diff options
Diffstat (limited to 'uploader/phenotypes')
-rw-r--r-- | uploader/phenotypes/__init__.py | 2 | ||||
-rw-r--r-- | uploader/phenotypes/misc.py | 26 | ||||
-rw-r--r-- | uploader/phenotypes/models.py | 396 | ||||
-rw-r--r-- | uploader/phenotypes/views.py | 1200 |
4 files changed, 1624 insertions, 0 deletions
diff --git a/uploader/phenotypes/__init__.py b/uploader/phenotypes/__init__.py new file mode 100644 index 0000000..c17d32c --- /dev/null +++ b/uploader/phenotypes/__init__.py @@ -0,0 +1,2 @@ +"""Package for handling ('classical') phenotype data""" +from .views import phenotypesbp diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py new file mode 100644 index 0000000..cbe3b7f --- /dev/null +++ b/uploader/phenotypes/misc.py @@ -0,0 +1,26 @@ +"""Miscellaneous functions handling phenotypes and phenotypes data.""" +import logging + +logger = logging.getLogger(__name__) + + +def phenotypes_data_differences( + filedata: tuple[dict, ...], dbdata: tuple[dict, ...] +) -> tuple[dict, ...]: + """Compute differences between file data and db data""" + diff = tuple() + for filerow, dbrow in zip( + sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])), + sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))): + for samplename, value in filerow["data"].items(): + if value != dbrow["data"].get(samplename, {}).get("value"): + diff = diff + ({ + "PhenotypeId": filerow["phenotype_id"], + "xref_id": filerow["xref_id"], + "DataId": dbrow["DataId"], + "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"), + "StrainName": samplename, + "value": value + },) + + return diff diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py new file mode 100644 index 0000000..c2aeebf --- /dev/null +++ b/uploader/phenotypes/models.py @@ -0,0 +1,396 @@ +"""Database and utility functions for phenotypes.""" +import logging +import tempfile +from pathlib import Path +from functools import reduce +from datetime import datetime +from typing import Optional, Iterable + +import MySQLdb as mdb +from MySQLdb.cursors import Cursor, DictCursor + +from functional_tools import take +from gn_libs.mysqldb import debug_query + +logger = logging.getLogger(__name__) + + +__PHENO_DATA_TABLES__ = { + "PublishData": { + "table": "PublishData", "valueCol": "value", "DataIdCol": "Id"}, + "PublishSE": { + "table": "PublishSE", "valueCol": "error", "DataIdCol": "DataId"}, + "NStrain": { + "table": "NStrain", "valueCol": "count", "DataIdCol": "DataId"} +} + + +def datasets_by_population( + conn: mdb.Connection, + species_id: int, + population_id: int +) -> tuple[dict, ...]: + """Retrieve all of a population's phenotype studies.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT s.SpeciesId, pf.* FROM Species AS s " + "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " + "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " + "WHERE s.Id=%s AND iset.Id=%s;", + (species_id, population_id)) + return tuple(dict(row) for row in cursor.fetchall()) + + +def dataset_by_id(conn: mdb.Connection, + species_id: int, + population_id: int, + dataset_id: int) -> dict: + """Fetch dataset details by identifier""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT Species.SpeciesId, PublishFreeze.* FROM Species " + "INNER JOIN InbredSet ON Species.Id=InbredSet.SpeciesId " + "INNER JOIN PublishFreeze ON InbredSet.Id=PublishFreeze.InbredSetId " + "WHERE Species.Id=%s AND InbredSet.Id=%s AND PublishFreeze.Id=%s", + (species_id, population_id, dataset_id)) + return dict(cursor.fetchone()) + + +def phenotypes_count(conn: mdb.Connection, + population_id: int, + dataset_id: int) -> int: + """Count the number of phenotypes in the dataset.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT COUNT(*) AS total_phenos FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s", + (population_id, dataset_id)) + return int(cursor.fetchone()["total_phenos"]) + + +def phenotype_publication_data(conn, phenotype_id) -> Optional[dict]: + """Retrieve the publication data for a phenotype if it exists.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT DISTINCT pxr.PhenotypeId, pub.* FROM PublishXRef AS pxr " + "INNER JOIN Publication as pub ON pxr.PublicationId=pub.Id " + "WHERE pxr.PhenotypeId=%s", + (phenotype_id,)) + res = cursor.fetchone() + if res is None: + return res + return dict(res) + + +def dataset_phenotypes(conn: mdb.Connection, + population_id: int, + dataset_id: int, + offset: int = 0, + limit: Optional[int] = None) -> tuple[dict, ...]: + """Fetch the actual phenotypes.""" + _query = ( + "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, ist.InbredSetCode FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( + f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_query, (population_id, dataset_id)) + debug_query(cursor, logger) + return tuple(dict(row) for row in cursor.fetchall()) + + +def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids): + """Fetch standard-error values (if they exist) for a phenotype.""" + paramstr = ", ".join(["(%s, %s)"] * len(dataids_and_strainids)) + flat = tuple(item for sublist in dataids_and_strainids for item in sublist) + cursor.execute("SELECT * FROM PublishSE WHERE (DataId, StrainId) IN " + f"({paramstr})", + flat) + debug_query(cursor, logger) + _se = { + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "error": row["error"] + } + for row in cursor.fetchall() + } + + cursor.execute("SELECT * FROM NStrain WHERE (DataId, StrainId) IN " + f"({paramstr})", + flat) + debug_query(cursor, logger) + _n = { + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "count": row["count"] + } + for row in cursor.fetchall() + } + + keys = set(tuple(_se.keys()) + tuple(_n.keys())) + return { + key: {"xref_id": xref_id, **_se.get(key,{}), **_n.get(key,{})} + for key in keys + } + +def __organise_by_phenotype__(pheno, row): + """Organise disparate data rows into phenotype 'objects'.""" + _pheno = pheno.get(row["Id"]) + return { + **pheno, + row["Id"]: { + "Id": row["Id"], + "Pre_publication_description": row["Pre_publication_description"], + "Post_publication_description": row["Post_publication_description"], + "Original_description": row["Original_description"], + "Units": row["Units"], + "Pre_publication_abbreviation": row["Pre_publication_abbreviation"], + "Post_publication_abbreviation": row["Post_publication_abbreviation"], + "xref_id": row["pxr.Id"], + "DataId": row["DataId"], + "data": { + **(_pheno["data"] if bool(_pheno) else {}), + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "mean": row["mean"], + "Locus": row["Locus"], + "LRS": row["LRS"], + "additive": row["additive"], + "Sequence": row["Sequence"], + "comments": row["comments"], + "value": row["value"], + "StrainName": row["Name"], + "StrainName2": row["Name2"], + "StrainSymbol": row["Symbol"], + "StrainAlias": row["Alias"] + } + } + } + } + + +def __merge_pheno_data_and_se__(data, sedata) -> dict: + """Merge phenotype data with the standard errors.""" + return { + key: {**value, **sedata.get(key, {})} + for key, value in data.items() + } + + +def phenotype_by_id( + conn: mdb.Connection, + species_id: int, + population_id: int, + dataset_id: int, + xref_id +) -> Optional[dict]: + """Fetch a specific phenotype.""" + _dataquery = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " + "INNER JOIN Strain AS str ON pd.StrainId=str.Id " + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + "WHERE " + "(str.SpeciesId, pxr.InbredSetId, pf.Id, pxr.Id)=(%s, %s, %s, %s)") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_dataquery, + (species_id, population_id, dataset_id, xref_id)) + _pheno: dict = reduce(__organise_by_phenotype__, cursor.fetchall(), {}) + if bool(_pheno) and len(_pheno.keys()) == 1: + _pheno = tuple(_pheno.values())[0] + return { + **_pheno, + "data": tuple(__merge_pheno_data_and_se__( + _pheno["data"], + __phenotype_se__( + cursor, xref_id, tuple(_pheno["data"].keys())) + ).values()) + } + if bool(_pheno) and len(_pheno.keys()) > 1: + raise Exception( + "We found more than one phenotype with the same identifier!") + + return None + + +def phenotypes_data(conn: mdb.Connection, + population_id: int, + dataset_id: int, + offset: int = 0, + limit: Optional[int] = None) -> tuple[dict, ...]: + """Fetch the data for the phenotypes.""" + # — Phenotype -> PublishXRef -> PublishData -> Strain -> StrainXRef -> PublishFreeze + _query = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " + "INNER JOIN Strain AS str ON pd.StrainId=str.Id " + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( + f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_query, (population_id, dataset_id)) + debug_query(cursor, logger) + return tuple(dict(row) for row in cursor.fetchall()) + + +def save_new_dataset(cursor: Cursor, + population_id: int, + dataset_name: str, + dataset_fullname: str, + dataset_shortname: str) -> dict: + """Create a new phenotype dataset.""" + params = { + "population_id": population_id, + "dataset_name": dataset_name, + "dataset_fullname": dataset_fullname, + "dataset_shortname": dataset_shortname, + "created": datetime.now().date().isoformat(), + "public": 2, + "confidentiality": 0, + "users": None + } + cursor.execute( + "INSERT INTO PublishFreeze(Name, FullName, ShortName, CreateTime, " + "public, InbredSetId, confidentiality, AuthorisedUsers) " + "VALUES(%(dataset_name)s, %(dataset_fullname)s, %(dataset_shortname)s, " + "%(created)s, %(public)s, %(population_id)s, %(confidentiality)s, " + "%(users)s)", + params) + debug_query(cursor, logger) + return {**params, "Id": cursor.lastrowid} + + +def phenotypes_data_by_ids( + conn: mdb.Connection, + inbred_pheno_xref: dict[str, int] +) -> tuple[dict, ...]: + """Fetch all phenotype data, filtered by the `inbred_pheno_xref` mapping.""" + _paramstr = ",".join(["(%s, %s, %s)"] * len(inbred_pheno_xref)) + _query = ("SELECT " + "pub.PubMed_ID, pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Publication AS pub " + "RIGHT JOIN PublishXRef AS pxr0 ON pub.Id=pxr0.PublicationId " + "INNER JOIN Phenotype AS pheno ON pxr0.PhenotypeId=pheno.id " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " + "INNER JOIN Strain AS str ON pd.StrainId=str.Id " + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + f"WHERE (pxr.InbredSetId, pheno.Id, pxr.Id) IN ({_paramstr}) " + "ORDER BY pheno.Id") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_query, tuple(item for row in inbred_pheno_xref + for item in (row["population_id"], + row["phenoid"], + row["xref_id"]))) + debug_query(cursor, logger) + return tuple( + reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values()) + + +def create_new_phenotypes(conn: mdb.Connection, + phenotypes: Iterable[dict]) -> tuple[dict, ...]: + """Add entirely new phenotypes to the database.""" + _phenos = tuple() + with conn.cursor(cursorclass=DictCursor) as cursor: + while True: + batch = take(phenotypes, 1000) + if len(batch) == 0: + break + + cursor.executemany( + ("INSERT INTO " + "Phenotype(Pre_publication_description, Original_description, Units, Authorized_Users) " + "VALUES (%s, %s, %s, 'robwilliams')"), + tuple((row["id"], row["description"], row["units"]) + for row in batch)) + paramstr = ", ".join(["%s"] * len(batch)) + cursor.execute( + "SELECT * FROM Phenotype WHERE Pre_publication_description IN " + f"({paramstr})", + tuple(item["id"] for item in batch)) + _phenos = _phenos + tuple({ + "phenotype_id": row["Id"], + "id": row["Pre_publication_description"], + "description": row["Original_description"], + "units": row["Units"] + } for row in cursor.fetchall()) + + return _phenos + + +def save_phenotypes_data( + conn: mdb.Connection, + table: str, + data: Iterable[dict] +) -> int: + """Save new phenotypes data into the database.""" + _table_details = __PHENO_DATA_TABLES__[table] + with conn.cursor(cursorclass=DictCursor) as cursor: + _count = 0 + while True: + batch = take(data, 100000) + if len(batch) == 0: + logger.warning("Got an empty batch. This needs investigation.") + break + + logger.debug("Saving batch of %s items.", len(batch)) + cursor.executemany( + (f"INSERT INTO {_table_details['table']}" + f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) " + "VALUES " + f"(%(data_id)s, %(sample_id)s, %(value)s) "), + tuple(batch)) + debug_query(cursor, logger) + _count = _count + len(batch) + + + logger.debug("Saved a total of %s data rows", _count) + return _count + + +def quick_save_phenotypes_data( + conn: mdb.Connection, + table: str, + dataitems: Iterable[dict], + tmpdir: Path +) -> int: + """Save data items to the database, but using """ + _table_details = __PHENO_DATA_TABLES__[table] + with (tempfile.NamedTemporaryFile( + prefix=f"{table}_data", mode="wt", dir=tmpdir) as tmpfile, + conn.cursor(cursorclass=DictCursor) as cursor): + _count = 0 + logger.debug("Write data rows to text file.") + for row in dataitems: + tmpfile.write( + f'{row["data_id"]}\t{row["sample_id"]}\t{row["value"]}\n') + _count = _count + 1 + tmpfile.flush() + + logger.debug("Load text file into database (table: %s)", + _table_details["table"]) + cursor.execute( + f"LOAD DATA LOCAL INFILE '{tmpfile.name}' " + f"INTO TABLE {_table_details['table']} " + "(" + f"{_table_details['DataIdCol']}, " + "StrainId, " + f"{_table_details['valueCol']}" + ")") + debug_query(cursor, logger) + return _count diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py new file mode 100644 index 0000000..bc15f2d --- /dev/null +++ b/uploader/phenotypes/views.py @@ -0,0 +1,1200 @@ +"""Views handling ('classical') phenotypes.""" +import sys +import csv +import uuid +import json +import logging +import tempfile +from typing import Any +from pathlib import Path +from zipfile import ZipFile +from functools import wraps, reduce +from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING +from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode + +import datetime +from datetime import timedelta + +from redis import Redis +from pymonad.either import Left +from requests.models import Response +from MySQLdb.cursors import DictCursor +from werkzeug.utils import secure_filename + +from gn_libs import sqlite3 +from gn_libs import jobs as gnlibs_jobs +from gn_libs.jobs.jobs import JobNotFound +from gn_libs.mysqldb import database_connection +from gn_libs import monadic_requests as mrequests + +from authlib.jose import jwt +from flask import (flash, + request, + url_for, + jsonify, + redirect, + Blueprint, + send_file, + current_app as app) + +# from r_qtl import r_qtl2 as rqtl2 +from r_qtl import r_qtl2_qc as rqc +from r_qtl import exceptions as rqe + + +from uploader import jobs +from uploader import session +from uploader.files import save_file#, fullpath +from uploader.ui import make_template_renderer +from uploader.oauth2.client import oauth2_post +from uploader.authorisation import require_login +from uploader.oauth2 import jwks, client as oauth2client +from uploader.route_utils import generic_select_population +from uploader.datautils import safe_int, enumerate_sequence +from uploader.species.models import all_species, species_by_id +from uploader.monadic_requests import make_either_error_handler +from uploader.publications.models import fetch_publication_by_id +from uploader.request_checks import with_species, with_population +from uploader.samples.models import samples_by_species_and_population +from uploader.input_validation import (encode_errors, + decode_errors, + is_valid_representative_name) + +from .models import (dataset_by_id, + phenotype_by_id, + phenotypes_count, + save_new_dataset, + dataset_phenotypes, + datasets_by_population, + phenotypes_data_by_ids, + phenotype_publication_data) + +phenotypesbp = Blueprint("phenotypes", __name__) +render_template = make_template_renderer("phenotypes") + +_FAMILIES_WITH_SE_AND_N_ = ( + "Reference Populations (replicate average, SE, N)",) + +@phenotypesbp.route("/phenotypes", methods=["GET"]) +@require_login +def index(): + """Direct entry-point for phenotypes data handling.""" + with database_connection(app.config["SQL_URI"]) as conn: + if not bool(request.args.get("species_id")): + return render_template("phenotypes/index.html", + species=all_species(conn), + activelink="phenotypes") + + species_id = request.args.get("species_id") + if species_id == "CREATE-SPECIES": + return redirect(url_for( + "species.create_species", + return_to="species.populations.phenotypes.select_population")) + + species = species_by_id(conn, species_id) + if not bool(species): + flash("No such species!", "alert-danger") + return redirect(url_for("species.populations.phenotypes.index")) + return redirect(url_for("species.populations.phenotypes.select_population", + species_id=species["SpeciesId"])) + + +@phenotypesbp.route("<int:species_id>/phenotypes/select-population", + methods=["GET"]) +@require_login +@with_species(redirect_uri="species.populations.phenotypes.index") +def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument] + """Select the population for your phenotypes.""" + return generic_select_population( + species, + "phenotypes/select-population.html", + request.args.get("population_id") or "", + "species.populations.phenotypes.select_population", + "species.populations.phenotypes.list_datasets", + "phenotypes", + "No such population found!") + + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets", + methods=["GET"]) +@require_login +@with_population(species_redirect_uri="species.populations.phenotypes.index", + redirect_uri="species.populations.phenotypes.select_population") +def list_datasets(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument] + """List available phenotype datasets.""" + with database_connection(app.config["SQL_URI"]) as conn: + datasets = datasets_by_population( + conn, species["SpeciesId"], population["Id"]) + if len(datasets) == 1: + return redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=datasets[0]["Id"])) + return render_template("phenotypes/list-datasets.html", + species=species, + population=population, + datasets=datasets, + activelink="list-datasets") + + +def with_dataset( + species_redirect_uri: str, + population_redirect_uri: str, + redirect_uri: str +): + """Ensure the dataset actually exists.""" + def __decorator__(func): + @wraps(func) + @with_population(species_redirect_uri, population_redirect_uri) + def __with_dataset__(**kwargs): + try: + _spcid = int(kwargs["species_id"]) + _popid = int(kwargs["population_id"]) + _dsetid = int(kwargs.get("dataset_id")) + select_dataset_uri = redirect(url_for( + redirect_uri, species_id=_spcid, population_id=_popid)) + if not bool(_dsetid): + flash("You need to select a valid 'dataset_id' value.", + "alert-danger") + return select_dataset_uri + with database_connection(app.config["SQL_URI"]) as conn: + dataset = dataset_by_id(conn, _spcid, _popid, _dsetid) + if not bool(dataset): + flash("You must select a valid dataset.", + "alert-danger") + return select_dataset_uri + except ValueError as _verr: + app.logger.debug( + "Exception converting 'dataset_id' to integer: %s", + kwargs.get("dataset_id"), + exc_info=True) + flash("Expected 'dataset_id' value to be an integer." + "alert-danger") + return select_dataset_uri + return func(dataset=dataset, **kwargs) + return __with_dataset__ + return __decorator__ + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/view", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def view_dataset(# pylint: disable=[unused-argument] + species: dict, population: dict, dataset: dict, **kwargs): + """View a specific dataset""" + with database_connection(app.config["SQL_URI"]) as conn: + dataset = dataset_by_id( + conn, species["SpeciesId"], population["Id"], dataset["Id"]) + if not bool(dataset): + flash("Could not find such a phenotype dataset!", "alert-danger") + return redirect(url_for( + "species.populations.phenotypes.list_datasets", + species_id=species["SpeciesId"], + population_id=population["Id"])) + + start_at = max(safe_int(request.args.get("start_at") or 0), 0) + count = int(request.args.get("count") or 20) + return render_template("phenotypes/view-dataset.html", + species=species, + population=population, + dataset=dataset, + phenotype_count=phenotypes_count( + conn, population["Id"], dataset["Id"]), + phenotypes=enumerate_sequence( + dataset_phenotypes( + conn, + population["Id"], + dataset["Id"])), + start_from=start_at, + count=count, + activelink="view-dataset") + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/phenotype/<xref_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def view_phenotype(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + xref_id: int, + **kwargs +): + """View an individual phenotype from the dataset.""" + def __render__(privileges): + phenotype = phenotype_by_id(conn, + species["SpeciesId"], + population["Id"], + dataset["Id"], + xref_id) + def __non_empty__(value) -> bool: + if isinstance(value, str): + return value.strip() != "" + return bool(value) + + return render_template( + "phenotypes/view-phenotype.html", + species=species, + population=population, + dataset=dataset, + xref_id=xref_id, + phenotype=phenotype, + has_se=any(bool(item.get("error")) for item in phenotype["data"]), + publish_data={ + key.replace("_", " "): val + for key,val in + (phenotype_publication_data(conn, phenotype["Id"]) or {}).items() + if (key in ("PubMed_ID", "Authors", "Title", "Journal") + and __non_empty__(val)) + }, + privileges=(privileges + ### For demo! Do not commit this part + + ("group:resource:edit-resource", + "group:resource:delete-resource",) + ### END: For demo! Do not commit this part + ), + activelink="view-phenotype") + + def __fail__(error): + if isinstance(error, Response) and error.json() == "No linked resource!": + return __render__(tuple()) + return make_either_error_handler( + "There was an error fetching the roles and privileges.")(error) + + with database_connection(app.config["SQL_URI"]) as conn: + return oauth2_post( + "/auth/resource/phenotypes/individual/linked-resource", + json={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "xref_id": xref_id + } + ).then( + lambda resource: tuple( + privilege["privilege_id"] for role in resource["roles"] + for privilege in role["privileges"]) + ).then(__render__).either(__fail__, lambda resp: resp) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets/create", + methods=["GET", "POST"]) +@require_login +@with_population( + species_redirect_uri="species.populations.phenotypes.index", + redirect_uri="species.populations.phenotypes.select_population") +def create_dataset(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument] + """Create a new phenotype dataset.""" + with (database_connection(app.config["SQL_URI"]) as conn, + conn.cursor(cursorclass=DictCursor) as cursor): + if request.method == "GET": + return render_template("phenotypes/create-dataset.html", + activelink="create-dataset", + species=species, + population=population, + **decode_errors( + request.args.get("error_values", ""))) + + form = request.form + _errors: tuple[tuple[str, str], ...] = tuple() + if not is_valid_representative_name( + (form.get("dataset-name") or "").strip()): + _errors = _errors + (("dataset-name", "Invalid dataset name."),) + + if not bool((form.get("dataset-fullname") or "").strip()): + _errors = _errors + (("dataset-fullname", + "You must provide a value for 'Full Name'."),) + + if bool(_errors) > 0: + return redirect(url_for( + "species.populations.phenotypes.create_dataset", + species_id=species["SpeciesId"], + population_id=population["Id"], + error_values=encode_errors(_errors, form))) + + dataset_shortname = ( + form["dataset-shortname"] or form["dataset-name"]).strip() + _pheno_dataset = save_new_dataset( + cursor, + population["Id"], + form["dataset-name"].strip(), + form["dataset-fullname"].strip(), + dataset_shortname) + return redirect(url_for("species.populations.phenotypes.list_datasets", + species_id=species["SpeciesId"], + population_id=population["Id"])) + + +def process_phenotypes_rqtl2_bundle(error_uri): + """Process phenotypes from the uploaded R/qtl2 bundle.""" + _redisuri = app.config["REDIS_URL"] + _sqluri = app.config["SQL_URI"] + try: + ## Handle huge files here... + phenobundle = save_file(request.files["phenotypes-bundle"], + Path(app.config["UPLOAD_FOLDER"])) + rqc.validate_bundle(phenobundle) + return phenobundle + except AssertionError as _aerr: + app.logger.debug("File upload error!", exc_info=True) + flash("Expected a zipped bundle of files with phenotypes' " + "information.", + "alert-danger") + return error_uri + except rqe.RQTLError as rqtlerr: + app.logger.debug("Bundle validation error!", exc_info=True) + flash("R/qtl2 Error: " + " ".join(rqtlerr.args), "alert-danger") + return error_uri + + +def process_phenotypes_individual_files(error_uri): + """Process the uploaded individual files.""" + form = request.form + cdata = { + "sep": form["file-separator"], + "comment.char": form["file-comment-character"], + "na.strings": form["file-na"].split(" "), + } + bundlepath = Path(app.config["UPLOAD_FOLDER"], + f"{str(uuid.uuid4()).replace('-', '')}.zip") + with ZipFile(bundlepath,mode="w") as zfile: + for rqtlkey, formkey in (("phenocovar", "phenotype-descriptions"), + ("pheno", "phenotype-data"), + ("phenose", "phenotype-se"), + ("phenonum", "phenotype-n")): + cdata[f"{rqtlkey}_transposed"] = ( + (form.get(f"{formkey}-transposed") or "off") == "on") + + if form.get("resumable-upload", False): + # Chunked upload of large files was used + filedata = json.loads(form[formkey]) + zfile.write( + Path(app.config["UPLOAD_FOLDER"], filedata["uploaded-file"]), + arcname=filedata["original-name"]) + cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]] + else: + # TODO: Check this path: fix any bugs. + _sentfile = request.files[formkey] + if not bool(_sentfile): + flash(f"Expected file ('{formkey}') was not provided.", + "alert-danger") + return error_uri + + filepath = save_file( + _sentfile, Path(app.config["UPLOAD_FOLDER"]), hashed=False) + zfile.write( + Path(app.config["UPLOAD_FOLDER"], filepath), + arcname=filepath.name) + cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] + + + zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2)) + + return bundlepath + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/add-phenotypes", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# pylint: disable=[unused-argument, too-many-locals] + """Add one or more phenotypes to the dataset.""" + use_bundle = request.args.get("use_bundle", "").lower() == "true" + add_phenos_uri = redirect(url_for( + "species.populations.phenotypes.add_phenotypes", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"])) + _redisuri = app.config["REDIS_URL"] + _sqluri = app.config["SQL_URI"] + with (Redis.from_url(_redisuri, decode_responses=True) as rconn, + # database_connection(_sqluri) as conn, + # conn.cursor(cursorclass=DictCursor) as cursor + ): + if request.method == "GET": + today = datetime.date.today() + return render_template( + ("phenotypes/add-phenotypes-with-rqtl2-bundle.html" + if use_bundle else "phenotypes/add-phenotypes-raw-files.html"), + species=species, + population=population, + dataset=dataset, + monthnames=( + "January", "February", "March", "April", + "May", "June", "July", "August", + "September", "October", "November", + "December"), + current_month=today.strftime("%B"), + current_year=int(today.strftime("%Y")), + families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_, + use_bundle=use_bundle, + activelink="add-phenotypes") + + phenobundle = (process_phenotypes_rqtl2_bundle(add_phenos_uri) + if use_bundle else + process_phenotypes_individual_files(add_phenos_uri)) + + _jobid = uuid.uuid4() + _namespace = jobs.jobsnamespace() + _ttl_seconds = app.config["JOBS_TTL_SECONDS"] + _job = jobs.launch_job( + jobs.initialise_job( + rconn, + _namespace, + str(_jobid), + [sys.executable, "-m", "scripts.rqtl2.phenotypes_qc", _sqluri, + _redisuri, _namespace, str(_jobid), str(species["SpeciesId"]), + str(population["Id"]), + # str(dataset["Id"]), + str(phenobundle), + "--loglevel", + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower(), + "--redisexpiry", + str(_ttl_seconds)], "phenotype_qc", _ttl_seconds, + {"job-metadata": json.dumps({ + "speciesid": species["SpeciesId"], + "populationid": population["Id"], + "datasetid": dataset["Id"], + "bundle": str(phenobundle.absolute()), + **({"publicationid": request.form["publication-id"]} + if request.form.get("publication-id") else {})})}), + _redisuri, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + + app.logger.debug("JOB DETAILS: %s", _job) + jobstatusuri = url_for("species.populations.phenotypes.job_status", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + job_id=str(_job["jobid"])) + return ((jsonify({ + "redirect-to": jobstatusuri, + "statuscode": 200, + "message": ("Follow the 'redirect-to' URI to see the state " + "of the quality-control job started for your " + "uploaded files.") + }), 200) + if request.form.get("resumable-upload", False) else + redirect(jobstatusuri)) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/job/<uuid:job_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def job_status( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + """Retrieve current status of a particular phenotype QC job.""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) + except jobs.JobNotFound as _jnf: + job = None + return render_template("phenotypes/job-status.html", + species=species, + population=population, + dataset=dataset, + job_id=job_id, + job=job, + errors=jobs.job_errors( + rconn, jobs.jobsnamespace(), job['jobid']), + metadata=jobs.job_files_metadata( + rconn, jobs.jobsnamespace(), job['jobid']), + activelink="add-phenotypes") + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/job/<uuid:job_id>/review", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def review_job_data( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + """Review data one more time before entering it into the database.""" + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as conn): + try: + job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) + except jobs.JobNotFound as _jnf: + job = None + + def __metadata_by_type__(by_type, item): + filetype = item[1]["filetype"] + return { + **by_type, + filetype: (by_type.get(filetype, tuple()) + + ({"filename": item[0], **item[1]},)) + } + metadata: dict[str, Any] = reduce( + __metadata_by_type__, + (jobs.job_files_metadata( + rconn, jobs.jobsnamespace(), job['jobid']) + if job else {}).items(), + {}) + + def __desc__(filetype): + match filetype: + case "phenocovar": + desc = "phenotypes" + case "pheno": + desc = "phenotypes data" + case "phenose": + desc = "phenotypes standard-errors" + case "phenonum": + desc = "phenotypes samples" + case _: + desc = f"unknown file type '{filetype}'." + + return desc + + def __summarise__(filetype, files): + return { + "filetype": filetype, + "number-of-files": len(files), + "total-data-rows": sum( + int(afile["linecount"]) - 1 for afile in files), + "description": __desc__(filetype) + } + + summary = { + filetype: __summarise__(filetype, meta) + for filetype,meta in metadata.items() + } + _job_metadata = json.loads(job["job-metadata"]) + return render_template("phenotypes/review-job-data.html", + species=species, + population=population, + dataset=dataset, + job_id=job_id, + job=job, + summary=summary, + publication=( + fetch_publication_by_id( + conn, int(_job_metadata["publicationid"])) + if _job_metadata.get("publicationid") + else None), + activelink="add-phenotypes") + + +def load_phenotypes_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + return redirect(url_for( + "species.populations.phenotypes.load_data_success", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-to-database", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_to_database( + species: dict, + population: dict, + dataset: dict, + **kwargs +):# pylint: disable=[unused-argument] + """Load the data from the given QC job into the database.""" + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + sqlite3.connection(jobs_db) as conn): + qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"]) + _meta = json.loads(qc_job["job-metadata"]) + load_job_id = uuid.uuid4() + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.load_phenotypes_to_db", + app.config["SQL_URI"], + jobs_db, + str(load_job_id), + "--log-level", + _loglevel + ] + + def __handle_error__(resp): + return render_template("http-error.html", *resp.json()) + + def __handle_success__(load_job): + app.logger.debug("The phenotypes loading job: %s", load_job) + return redirect(url_for( + "background-jobs.job_status", job_id=load_job["job_id"])) + + issued = datetime.datetime.now() + jwtkey = jwks.newest_jwk_with_rotation( + jwks.jwks_directory(app, "UPLOADER_SECRETS"), + int(app.config["JWKS_ROTATION_AGE_DAYS"])) + + return mrequests.post( + urljoin(oauth2client.authserver_uri(), "auth/token"), + json={ + "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", + "scope": oauth2client.SCOPE, + "assertion": jwt.encode( + header={ + "alg": "RS256", + "typ": "JWT", + "kid": jwtkey.as_dict()["kid"] + }, + payload={ + "iss": str(oauth2client.oauth2_clientid()), + "sub": str(session.user_details()["user_id"]), + "aud": urljoin(oauth2client.authserver_uri(), + "auth/token"), + # TODO: Update expiry time once fix is implemented in + # auth server. + "exp": (issued + timedelta(minutes=5)).timestamp(), + "nbf": int(issued.timestamp()), + "iat": int(issued.timestamp()), + "jti": str(uuid.uuid4()) + }, + key=jwtkey).decode("utf8"), + "client_id": oauth2client.oauth2_clientid() + } + ).then( + lambda token: gnlibs_jobs.initialise_job( + conn, + load_job_id, + command, + "load-new-phenotypes-data", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "bundle_file": _meta["bundle"], + "publication_id": _meta["publicationid"], + "authserver": oauth2client.authserver_uri(), + "token": token["access_token"], + "success_handler": ( + "uploader.phenotypes.views" + ".load_phenotypes_success_handler") + }) + ).then( + lambda job: gnlibs_jobs.launch_job( + job, + jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + ).either(__handle_error__, __handle_success__) + + +def update_phenotype_metadata(conn, metadata: dict): + """Update a phenotype's basic metadata values.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Phenotype WHERE Id=%(phenotype-id)s", + metadata) + res = { + **{ + _key: _val for _key,_val in { + key.lower().replace("_", "-"): value + for key, value in (cursor.fetchone() or {}).items() + }.items() + if _key in metadata.keys() + }, + "phenotype-id": metadata.get("phenotype-id") + } + if res == metadata: + return False + + cursor.execute( + "UPDATE Phenotype SET " + "Pre_publication_description=%(pre-publication-description)s, " + "Post_publication_description=%(post-publication-description)s, " + "Original_description=%(original-description)s, " + "Units=%(units)s, " + "Pre_publication_abbreviation=%(pre-publication-abbreviation)s, " + "Post_publication_abbreviation=%(post-publication-abbreviation)s " + "WHERE Id=%(phenotype-id)s", + metadata) + return cursor.rowcount + + +def update_phenotype_values(conn, values): + """Update a phenotype's data values.""" + with conn.cursor() as cursor: + cursor.executemany( + "UPDATE PublishData SET value=%(new)s " + "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in values if item["new"] is not None)) + cursor.executemany( + "DELETE FROM PublishData " + "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in values if item["new"] is None)) + return len(values) + return 0 + + +def update_phenotype_se(conn, serrs): + """Update a phenotype's standard-error values.""" + with conn.cursor() as cursor: + cursor.executemany( + "INSERT INTO PublishSE(DataId, StrainId, error) " + "VALUES(%(data_id)s, %(strain_id)s, %(new)s) " + "ON DUPLICATE KEY UPDATE error=VALUES(error)", + tuple(item for item in serrs if item["new"] is not None)) + cursor.executemany( + "DELETE FROM PublishSE " + "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in serrs if item["new"] is None)) + return len(serrs) + return 0 + + +def update_phenotype_n(conn, counts): + """Update a phenotype's strain counts.""" + with conn.cursor() as cursor: + cursor.executemany( + "INSERT INTO NStrain(DataId, StrainId, count) " + "VALUES(%(data_id)s, %(strain_id)s, %(new)s) " + "ON DUPLICATE KEY UPDATE count=VALUES(count)", + tuple(item for item in counts if item["new"] is not None)) + cursor.executemany( + "DELETE FROM NStrain " + "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in counts if item["new"] is None)) + return len(counts) + + return 0 + + +def update_phenotype_data(conn, data: dict): + """Update the numeric data for a phenotype.""" + def __organise_by_dataid_and_strainid__(acc, current): + _key, dataid, strainid = current[0].split("::") + _keysrc, _keytype = _key.split("-") + newkey = f"{dataid}::{strainid}" + newitem = acc.get(newkey, {}) + newitem[_keysrc] = newitem.get(_keysrc, {}) + newitem[_keysrc][_keytype] = current[1] + return {**acc, newkey: newitem} + + def __separate_items__(acc, row): + key, val = row + return ({ + **acc[0], + key: { + **val["value"], + "changed?": (not val["value"]["new"] == val["value"]["original"]) + } + }, { + **acc[1], + key: { + **val["se"], + "changed?": (not val["se"]["new"] == val["se"]["original"]) + } + },{ + **acc[2], + key: { + **val["n"], + "changed?": (not val["n"]["new"] == val["n"]["original"]) + } + }) + + values, serrs, counts = tuple( + tuple({ + "data_id": row[0].split("::")[0], + "strain_id": row[0].split("::")[1], + "new": row[1]["new"] + } for row in item) + for item in ( + filter(lambda val: val[1]["changed?"], item.items())# type: ignore[arg-type] + for item in reduce(# type: ignore[var-annotated] + __separate_items__, + reduce(__organise_by_dataid_and_strainid__, + data.items(), + {}).items(), + ({}, {}, {})))) + + return (update_phenotype_values(conn, values), + update_phenotype_se(conn, serrs), + update_phenotype_n(conn, counts)) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/phenotype/<int:xref_id>/edit", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + xref_id: int, + **kwargs +): + """Edit the data for a particular phenotype.""" + def __render__(**kwargs): + processed_kwargs = { + **kwargs, + "privileges": (kwargs.get("privileges", tuple()) + ### For demo! Do not commit this part + + ("group:resource:edit-resource", + "group:resource:delete-resource",) + ### END: For demo! Do not commit this part + ) + } + return render_template( + "phenotypes/edit-phenotype.html", + species=species, + population=population, + dataset=dataset, + xref_id=xref_id, + families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_, + **processed_kwargs, + activelink="edit-phenotype") + + with database_connection(app.config["SQL_URI"]) as conn: + if request.method == "GET": + def __fetch_phenotype__(privileges): + phenotype = phenotype_by_id(conn, + species["SpeciesId"], + population["Id"], + dataset["Id"], + xref_id) + if phenotype is None: + msg = ("Could not find the phenotype with cross-reference ID" + f" '{xref_id}' from dataset '{dataset['FullName']}' " + f" from the '{population['FullName']}' population of " + f" species '{species['FullName']}'.") + return Left({"privileges": privileges, "phenotype-error": msg}) + return {"privileges": privileges, "phenotype": phenotype} + + def __fetch_publication_data__(**kwargs): + pheno = kwargs["phenotype"] + return { + **kwargs, + "publication_data": phenotype_publication_data( + conn, pheno["Id"]) + } + + def __fail__(failure_object): + # process the object + return __render__(failure_object=failure_object) + + return oauth2_post( + "/auth/resource/phenotypes/individual/linked-resource", + json={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "xref_id": xref_id + } + ).then( + lambda resource: tuple( + privilege["privilege_id"] for role in resource["roles"] + for privilege in role["privileges"]) + ).then( + __fetch_phenotype__ + ).then( + lambda args: __fetch_publication_data__(**args) + ).either(__fail__, lambda args: __render__(**args)) + + ## POST + _change = False + match request.form.get("submit", "invalid-action"): + case "update basic metadata": + _change = update_phenotype_metadata(conn, { + key: value.strip() if bool(value.strip()) else None + for key, value in request.form.items() + if key not in ("submit",) + }) + msg = "Basic metadata was updated successfully." + case "update data": + _update = update_phenotype_data(conn, { + key: value.strip() if bool(value.strip()) else None + for key, value in request.form.items() + if key not in ("submit",) + }) + msg = (f"{_update[0]} value rows, {_update[1]} standard-error " + f"rows and {_update[2]} 'N' rows were updated.") + _change = any(item != 0 for item in _update) + case "update publication": + flash("NOT IMPLEMENTED: Would update publication data.", "alert-success") + case _: + flash("Invalid phenotype editing action.", "alert-danger") + + if _change: + flash(msg, "alert-success") + return redirect(url_for( + "species.populations.phenotypes.view_phenotype", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + xref_id=xref_id)) + + flash("No change was made by the user.", "alert-info") + return redirect(url_for( + "species.populations.phenotypes.edit_phenotype_data", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + xref_id=xref_id)) + + +def process_phenotype_data_for_download(pheno: dict) -> dict: + """Sanitise data for download.""" + return { + "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}", + **{ + key: val for key, val in pheno.items() + if key not in ("Id", "xref_id", "data", "Units") + }, + **{ + data_item["StrainName"]: data_item["value"] + for data_item in pheno.get("data", {}).values() + } + } + + +BULK_EDIT_COMMON_FIELDNAMES = [ + "UniqueIdentifier", + "Post_publication_description", + "Pre_publication_abbreviation", + "Pre_publication_description", + "Original_description", + "Post_publication_abbreviation", + "PubMed_ID" +] + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/edit-download", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_download_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + formdata = request.json + with database_connection(app.config["SQL_URI"]) as conn: + samples_list = [ + sample["Name"] for sample in samples_by_species_and_population( + conn, species["SpeciesId"], population["Id"])] + data = ( + process_phenotype_data_for_download(pheno) + for pheno in phenotypes_data_by_ids(conn, tuple({ + "population_id": population["Id"], + "phenoid": row["phenotype_id"], + "xref_id": row["xref_id"] + } for row in formdata))) + + with (tempfile.TemporaryDirectory( + prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir): + filename = Path(tmpdir).joinpath("tempfile.tsv") + with open(filename, mode="w") as outfile: + outfile.write( + "# **DO NOT** delete the 'UniqueIdentifier' row. It is used " + "by the system to identify and edit the correct rows and " + "columns in the database.\n") + outfile.write( + "# The '…_description' fields are useful for you to figure out " + "what row you are working on. Changing any of this fields will " + "also update the database, so do be careful.\n") + outfile.write( + "# Leave a field empty to delete the value in the database.\n") + outfile.write( + "# Any line beginning with a '#' character is considered a " + "comment line. This line, and all the lines above it, are " + "all comment lines. Comment lines will be ignored.\n") + writer = csv.DictWriter(outfile, + fieldnames= ( + BULK_EDIT_COMMON_FIELDNAMES + + samples_list), + dialect="excel-tab") + writer.writeheader() + writer.writerows(data) + outfile.flush() + + return send_file( + filename, + mimetype="text/csv", + as_attachment=True, + download_name=secure_filename(f"{dataset['Name']}_data")) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/edit-upload", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_upload_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + if request.method == "GET": + return render_template( + "phenotypes/bulk-edit-upload.html", + species=species, + population=population, + dataset=dataset, + activelink="edit-phenotype") + + edit_file = save_file(request.files["file-upload-bulk-edit-upload"], + Path(app.config["UPLOAD_FOLDER"])) + + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with sqlite3.connection(jobs_db) as conn: + job_id = uuid.uuid4() + job_cmd = [ + sys.executable, "-u", + "-m", "scripts.phenotypes_bulk_edit", + app.config["SQL_URI"], + jobs_db, + str(job_id), + "--log-level", + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower() + ] + app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job(conn, + job_id, + job_cmd, + "phenotype-bulk-edit", + extra_meta = { + "edit-file": str(edit_file), + "species-id": species["SpeciesId"], + "population-id": population["Id"], + "dataset-id": dataset["Id"] + }), + jobs_db, + f"{app.config['UPLOAD_FOLDER']}/job_errors", + worker_manager="gn_libs.jobs.launcher") + + + return redirect(url_for("background-jobs.job_status", + job_id=job_id, + job_type="phenotype-bulk-edit")) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-success/<uuid:job_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_success( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + with (database_connection(app.config["SQL_URI"]) as conn, + sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) + as jobsconn): + try: + gn2_uri = urlparse(app.config["GN2_SERVER_URL"]) + job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True) + app.logger.debug("THE JOB: %s", job) + _xref_ids = (str(item) for item + in json.loads(job["metadata"].get("xref_ids", "[]"))) + _publication = fetch_publication_by_id( + conn, int(job["metadata"].get("publication_id", "0"))) + _search_terms = (item for item in + (str(_publication["PubMed_ID"] or ""), + _publication["Authors"], + (_publication["Title"] or "")) + if item != "") + return render_template("phenotypes/load-phenotypes-success.html", + species=species, + population=population, + dataset=dataset, + job=job, + search_page_uri=urlunparse(ParseResult( + scheme=gn2_uri.scheme, + netloc=gn2_uri.netloc, + path="/search", + params="", + query=urlencode({ + "species": species["Name"], + "group": population["Name"], + "type": "Phenotypes", + "dataset": dataset["Name"], + "search_terms_or": ( + # Very long URLs will cause + # errors. + " ".join(_xref_ids) + if len(_xref_ids) <= 100 + else ""), + "search_terms_and": " ".join( + _search_terms).strip(), + "accession_id": "None", + "FormID": "searchResult" + }), + fragment=""))) + except JobNotFound as jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) |