aboutsummaryrefslogtreecommitdiff
path: root/uploader/phenotypes
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/phenotypes')
-rw-r--r--uploader/phenotypes/__init__.py2
-rw-r--r--uploader/phenotypes/misc.py26
-rw-r--r--uploader/phenotypes/models.py396
-rw-r--r--uploader/phenotypes/views.py1200
4 files changed, 1624 insertions, 0 deletions
diff --git a/uploader/phenotypes/__init__.py b/uploader/phenotypes/__init__.py
new file mode 100644
index 0000000..c17d32c
--- /dev/null
+++ b/uploader/phenotypes/__init__.py
@@ -0,0 +1,2 @@
+"""Package for handling ('classical') phenotype data"""
+from .views import phenotypesbp
diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py
new file mode 100644
index 0000000..cbe3b7f
--- /dev/null
+++ b/uploader/phenotypes/misc.py
@@ -0,0 +1,26 @@
+"""Miscellaneous functions handling phenotypes and phenotypes data."""
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def phenotypes_data_differences(
+ filedata: tuple[dict, ...], dbdata: tuple[dict, ...]
+) -> tuple[dict, ...]:
+ """Compute differences between file data and db data"""
+ diff = tuple()
+ for filerow, dbrow in zip(
+ sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])),
+ sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))):
+ for samplename, value in filerow["data"].items():
+ if value != dbrow["data"].get(samplename, {}).get("value"):
+ diff = diff + ({
+ "PhenotypeId": filerow["phenotype_id"],
+ "xref_id": filerow["xref_id"],
+ "DataId": dbrow["DataId"],
+ "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"),
+ "StrainName": samplename,
+ "value": value
+ },)
+
+ return diff
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
new file mode 100644
index 0000000..c2aeebf
--- /dev/null
+++ b/uploader/phenotypes/models.py
@@ -0,0 +1,396 @@
+"""Database and utility functions for phenotypes."""
+import logging
+import tempfile
+from pathlib import Path
+from functools import reduce
+from datetime import datetime
+from typing import Optional, Iterable
+
+import MySQLdb as mdb
+from MySQLdb.cursors import Cursor, DictCursor
+
+from functional_tools import take
+from gn_libs.mysqldb import debug_query
+
+logger = logging.getLogger(__name__)
+
+
+__PHENO_DATA_TABLES__ = {
+ "PublishData": {
+ "table": "PublishData", "valueCol": "value", "DataIdCol": "Id"},
+ "PublishSE": {
+ "table": "PublishSE", "valueCol": "error", "DataIdCol": "DataId"},
+ "NStrain": {
+ "table": "NStrain", "valueCol": "count", "DataIdCol": "DataId"}
+}
+
+
+def datasets_by_population(
+ conn: mdb.Connection,
+ species_id: int,
+ population_id: int
+) -> tuple[dict, ...]:
+ """Retrieve all of a population's phenotype studies."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT s.SpeciesId, pf.* FROM Species AS s "
+ "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId "
+ "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId "
+ "WHERE s.Id=%s AND iset.Id=%s;",
+ (species_id, population_id))
+ return tuple(dict(row) for row in cursor.fetchall())
+
+
+def dataset_by_id(conn: mdb.Connection,
+ species_id: int,
+ population_id: int,
+ dataset_id: int) -> dict:
+ """Fetch dataset details by identifier"""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT Species.SpeciesId, PublishFreeze.* FROM Species "
+ "INNER JOIN InbredSet ON Species.Id=InbredSet.SpeciesId "
+ "INNER JOIN PublishFreeze ON InbredSet.Id=PublishFreeze.InbredSetId "
+ "WHERE Species.Id=%s AND InbredSet.Id=%s AND PublishFreeze.Id=%s",
+ (species_id, population_id, dataset_id))
+ return dict(cursor.fetchone())
+
+
+def phenotypes_count(conn: mdb.Connection,
+ population_id: int,
+ dataset_id: int) -> int:
+ """Count the number of phenotypes in the dataset."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT COUNT(*) AS total_phenos FROM Phenotype AS pheno "
+ "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+ "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId "
+ "WHERE pxr.InbredSetId=%s AND pf.Id=%s",
+ (population_id, dataset_id))
+ return int(cursor.fetchone()["total_phenos"])
+
+
+def phenotype_publication_data(conn, phenotype_id) -> Optional[dict]:
+ """Retrieve the publication data for a phenotype if it exists."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT DISTINCT pxr.PhenotypeId, pub.* FROM PublishXRef AS pxr "
+ "INNER JOIN Publication as pub ON pxr.PublicationId=pub.Id "
+ "WHERE pxr.PhenotypeId=%s",
+ (phenotype_id,))
+ res = cursor.fetchone()
+ if res is None:
+ return res
+ return dict(res)
+
+
+def dataset_phenotypes(conn: mdb.Connection,
+ population_id: int,
+ dataset_id: int,
+ offset: int = 0,
+ limit: Optional[int] = None) -> tuple[dict, ...]:
+ """Fetch the actual phenotypes."""
+ _query = (
+ "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, ist.InbredSetCode FROM Phenotype AS pheno "
+ "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+ "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId "
+ "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id "
+ "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + (
+ f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "")
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(_query, (population_id, dataset_id))
+ debug_query(cursor, logger)
+ return tuple(dict(row) for row in cursor.fetchall())
+
+
+def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids):
+ """Fetch standard-error values (if they exist) for a phenotype."""
+ paramstr = ", ".join(["(%s, %s)"] * len(dataids_and_strainids))
+ flat = tuple(item for sublist in dataids_and_strainids for item in sublist)
+ cursor.execute("SELECT * FROM PublishSE WHERE (DataId, StrainId) IN "
+ f"({paramstr})",
+ flat)
+ debug_query(cursor, logger)
+ _se = {
+ (row["DataId"], row["StrainId"]): {
+ "DataId": row["DataId"],
+ "StrainId": row["StrainId"],
+ "error": row["error"]
+ }
+ for row in cursor.fetchall()
+ }
+
+ cursor.execute("SELECT * FROM NStrain WHERE (DataId, StrainId) IN "
+ f"({paramstr})",
+ flat)
+ debug_query(cursor, logger)
+ _n = {
+ (row["DataId"], row["StrainId"]): {
+ "DataId": row["DataId"],
+ "StrainId": row["StrainId"],
+ "count": row["count"]
+ }
+ for row in cursor.fetchall()
+ }
+
+ keys = set(tuple(_se.keys()) + tuple(_n.keys()))
+ return {
+ key: {"xref_id": xref_id, **_se.get(key,{}), **_n.get(key,{})}
+ for key in keys
+ }
+
+def __organise_by_phenotype__(pheno, row):
+ """Organise disparate data rows into phenotype 'objects'."""
+ _pheno = pheno.get(row["Id"])
+ return {
+ **pheno,
+ row["Id"]: {
+ "Id": row["Id"],
+ "Pre_publication_description": row["Pre_publication_description"],
+ "Post_publication_description": row["Post_publication_description"],
+ "Original_description": row["Original_description"],
+ "Units": row["Units"],
+ "Pre_publication_abbreviation": row["Pre_publication_abbreviation"],
+ "Post_publication_abbreviation": row["Post_publication_abbreviation"],
+ "xref_id": row["pxr.Id"],
+ "DataId": row["DataId"],
+ "data": {
+ **(_pheno["data"] if bool(_pheno) else {}),
+ (row["DataId"], row["StrainId"]): {
+ "DataId": row["DataId"],
+ "StrainId": row["StrainId"],
+ "mean": row["mean"],
+ "Locus": row["Locus"],
+ "LRS": row["LRS"],
+ "additive": row["additive"],
+ "Sequence": row["Sequence"],
+ "comments": row["comments"],
+ "value": row["value"],
+ "StrainName": row["Name"],
+ "StrainName2": row["Name2"],
+ "StrainSymbol": row["Symbol"],
+ "StrainAlias": row["Alias"]
+ }
+ }
+ }
+ }
+
+
+def __merge_pheno_data_and_se__(data, sedata) -> dict:
+ """Merge phenotype data with the standard errors."""
+ return {
+ key: {**value, **sedata.get(key, {})}
+ for key, value in data.items()
+ }
+
+
+def phenotype_by_id(
+ conn: mdb.Connection,
+ species_id: int,
+ population_id: int,
+ dataset_id: int,
+ xref_id
+) -> Optional[dict]:
+ """Fetch a specific phenotype."""
+ _dataquery = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode "
+ "FROM Phenotype AS pheno "
+ "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+ "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id "
+ "INNER JOIN Strain AS str ON pd.StrainId=str.Id "
+ "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId "
+ "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId "
+ "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId "
+ "WHERE "
+ "(str.SpeciesId, pxr.InbredSetId, pf.Id, pxr.Id)=(%s, %s, %s, %s)")
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(_dataquery,
+ (species_id, population_id, dataset_id, xref_id))
+ _pheno: dict = reduce(__organise_by_phenotype__, cursor.fetchall(), {})
+ if bool(_pheno) and len(_pheno.keys()) == 1:
+ _pheno = tuple(_pheno.values())[0]
+ return {
+ **_pheno,
+ "data": tuple(__merge_pheno_data_and_se__(
+ _pheno["data"],
+ __phenotype_se__(
+ cursor, xref_id, tuple(_pheno["data"].keys()))
+ ).values())
+ }
+ if bool(_pheno) and len(_pheno.keys()) > 1:
+ raise Exception(
+ "We found more than one phenotype with the same identifier!")
+
+ return None
+
+
+def phenotypes_data(conn: mdb.Connection,
+ population_id: int,
+ dataset_id: int,
+ offset: int = 0,
+ limit: Optional[int] = None) -> tuple[dict, ...]:
+ """Fetch the data for the phenotypes."""
+ # — Phenotype -> PublishXRef -> PublishData -> Strain -> StrainXRef -> PublishFreeze
+ _query = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode "
+ "FROM Phenotype AS pheno "
+ "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+ "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id "
+ "INNER JOIN Strain AS str ON pd.StrainId=str.Id "
+ "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId "
+ "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId "
+ "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId "
+ "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + (
+ f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "")
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(_query, (population_id, dataset_id))
+ debug_query(cursor, logger)
+ return tuple(dict(row) for row in cursor.fetchall())
+
+
+def save_new_dataset(cursor: Cursor,
+ population_id: int,
+ dataset_name: str,
+ dataset_fullname: str,
+ dataset_shortname: str) -> dict:
+ """Create a new phenotype dataset."""
+ params = {
+ "population_id": population_id,
+ "dataset_name": dataset_name,
+ "dataset_fullname": dataset_fullname,
+ "dataset_shortname": dataset_shortname,
+ "created": datetime.now().date().isoformat(),
+ "public": 2,
+ "confidentiality": 0,
+ "users": None
+ }
+ cursor.execute(
+ "INSERT INTO PublishFreeze(Name, FullName, ShortName, CreateTime, "
+ "public, InbredSetId, confidentiality, AuthorisedUsers) "
+ "VALUES(%(dataset_name)s, %(dataset_fullname)s, %(dataset_shortname)s, "
+ "%(created)s, %(public)s, %(population_id)s, %(confidentiality)s, "
+ "%(users)s)",
+ params)
+ debug_query(cursor, logger)
+ return {**params, "Id": cursor.lastrowid}
+
+
+def phenotypes_data_by_ids(
+ conn: mdb.Connection,
+ inbred_pheno_xref: dict[str, int]
+) -> tuple[dict, ...]:
+ """Fetch all phenotype data, filtered by the `inbred_pheno_xref` mapping."""
+ _paramstr = ",".join(["(%s, %s, %s)"] * len(inbred_pheno_xref))
+ _query = ("SELECT "
+ "pub.PubMed_ID, pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode "
+ "FROM Publication AS pub "
+ "RIGHT JOIN PublishXRef AS pxr0 ON pub.Id=pxr0.PublicationId "
+ "INNER JOIN Phenotype AS pheno ON pxr0.PhenotypeId=pheno.id "
+ "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+ "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id "
+ "INNER JOIN Strain AS str ON pd.StrainId=str.Id "
+ "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId "
+ "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId "
+ "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId "
+ f"WHERE (pxr.InbredSetId, pheno.Id, pxr.Id) IN ({_paramstr}) "
+ "ORDER BY pheno.Id")
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(_query, tuple(item for row in inbred_pheno_xref
+ for item in (row["population_id"],
+ row["phenoid"],
+ row["xref_id"])))
+ debug_query(cursor, logger)
+ return tuple(
+ reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values())
+
+
+def create_new_phenotypes(conn: mdb.Connection,
+ phenotypes: Iterable[dict]) -> tuple[dict, ...]:
+ """Add entirely new phenotypes to the database."""
+ _phenos = tuple()
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ while True:
+ batch = take(phenotypes, 1000)
+ if len(batch) == 0:
+ break
+
+ cursor.executemany(
+ ("INSERT INTO "
+ "Phenotype(Pre_publication_description, Original_description, Units, Authorized_Users) "
+ "VALUES (%s, %s, %s, 'robwilliams')"),
+ tuple((row["id"], row["description"], row["units"])
+ for row in batch))
+ paramstr = ", ".join(["%s"] * len(batch))
+ cursor.execute(
+ "SELECT * FROM Phenotype WHERE Pre_publication_description IN "
+ f"({paramstr})",
+ tuple(item["id"] for item in batch))
+ _phenos = _phenos + tuple({
+ "phenotype_id": row["Id"],
+ "id": row["Pre_publication_description"],
+ "description": row["Original_description"],
+ "units": row["Units"]
+ } for row in cursor.fetchall())
+
+ return _phenos
+
+
+def save_phenotypes_data(
+ conn: mdb.Connection,
+ table: str,
+ data: Iterable[dict]
+) -> int:
+ """Save new phenotypes data into the database."""
+ _table_details = __PHENO_DATA_TABLES__[table]
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ _count = 0
+ while True:
+ batch = take(data, 100000)
+ if len(batch) == 0:
+ logger.warning("Got an empty batch. This needs investigation.")
+ break
+
+ logger.debug("Saving batch of %s items.", len(batch))
+ cursor.executemany(
+ (f"INSERT INTO {_table_details['table']}"
+ f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) "
+ "VALUES "
+ f"(%(data_id)s, %(sample_id)s, %(value)s) "),
+ tuple(batch))
+ debug_query(cursor, logger)
+ _count = _count + len(batch)
+
+
+ logger.debug("Saved a total of %s data rows", _count)
+ return _count
+
+
+def quick_save_phenotypes_data(
+ conn: mdb.Connection,
+ table: str,
+ dataitems: Iterable[dict],
+ tmpdir: Path
+) -> int:
+ """Save data items to the database, but using """
+ _table_details = __PHENO_DATA_TABLES__[table]
+ with (tempfile.NamedTemporaryFile(
+ prefix=f"{table}_data", mode="wt", dir=tmpdir) as tmpfile,
+ conn.cursor(cursorclass=DictCursor) as cursor):
+ _count = 0
+ logger.debug("Write data rows to text file.")
+ for row in dataitems:
+ tmpfile.write(
+ f'{row["data_id"]}\t{row["sample_id"]}\t{row["value"]}\n')
+ _count = _count + 1
+ tmpfile.flush()
+
+ logger.debug("Load text file into database (table: %s)",
+ _table_details["table"])
+ cursor.execute(
+ f"LOAD DATA LOCAL INFILE '{tmpfile.name}' "
+ f"INTO TABLE {_table_details['table']} "
+ "("
+ f"{_table_details['DataIdCol']}, "
+ "StrainId, "
+ f"{_table_details['valueCol']}"
+ ")")
+ debug_query(cursor, logger)
+ return _count
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
new file mode 100644
index 0000000..bc15f2d
--- /dev/null
+++ b/uploader/phenotypes/views.py
@@ -0,0 +1,1200 @@
+"""Views handling ('classical') phenotypes."""
+import sys
+import csv
+import uuid
+import json
+import logging
+import tempfile
+from typing import Any
+from pathlib import Path
+from zipfile import ZipFile
+from functools import wraps, reduce
+from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING
+from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode
+
+import datetime
+from datetime import timedelta
+
+from redis import Redis
+from pymonad.either import Left
+from requests.models import Response
+from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
+
+from gn_libs import sqlite3
+from gn_libs import jobs as gnlibs_jobs
+from gn_libs.jobs.jobs import JobNotFound
+from gn_libs.mysqldb import database_connection
+from gn_libs import monadic_requests as mrequests
+
+from authlib.jose import jwt
+from flask import (flash,
+ request,
+ url_for,
+ jsonify,
+ redirect,
+ Blueprint,
+ send_file,
+ current_app as app)
+
+# from r_qtl import r_qtl2 as rqtl2
+from r_qtl import r_qtl2_qc as rqc
+from r_qtl import exceptions as rqe
+
+
+from uploader import jobs
+from uploader import session
+from uploader.files import save_file#, fullpath
+from uploader.ui import make_template_renderer
+from uploader.oauth2.client import oauth2_post
+from uploader.authorisation import require_login
+from uploader.oauth2 import jwks, client as oauth2client
+from uploader.route_utils import generic_select_population
+from uploader.datautils import safe_int, enumerate_sequence
+from uploader.species.models import all_species, species_by_id
+from uploader.monadic_requests import make_either_error_handler
+from uploader.publications.models import fetch_publication_by_id
+from uploader.request_checks import with_species, with_population
+from uploader.samples.models import samples_by_species_and_population
+from uploader.input_validation import (encode_errors,
+ decode_errors,
+ is_valid_representative_name)
+
+from .models import (dataset_by_id,
+ phenotype_by_id,
+ phenotypes_count,
+ save_new_dataset,
+ dataset_phenotypes,
+ datasets_by_population,
+ phenotypes_data_by_ids,
+ phenotype_publication_data)
+
+phenotypesbp = Blueprint("phenotypes", __name__)
+render_template = make_template_renderer("phenotypes")
+
+_FAMILIES_WITH_SE_AND_N_ = (
+ "Reference Populations (replicate average, SE, N)",)
+
+@phenotypesbp.route("/phenotypes", methods=["GET"])
+@require_login
+def index():
+ """Direct entry-point for phenotypes data handling."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ if not bool(request.args.get("species_id")):
+ return render_template("phenotypes/index.html",
+ species=all_species(conn),
+ activelink="phenotypes")
+
+ species_id = request.args.get("species_id")
+ if species_id == "CREATE-SPECIES":
+ return redirect(url_for(
+ "species.create_species",
+ return_to="species.populations.phenotypes.select_population"))
+
+ species = species_by_id(conn, species_id)
+ if not bool(species):
+ flash("No such species!", "alert-danger")
+ return redirect(url_for("species.populations.phenotypes.index"))
+ return redirect(url_for("species.populations.phenotypes.select_population",
+ species_id=species["SpeciesId"]))
+
+
+@phenotypesbp.route("<int:species_id>/phenotypes/select-population",
+ methods=["GET"])
+@require_login
+@with_species(redirect_uri="species.populations.phenotypes.index")
+def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument]
+ """Select the population for your phenotypes."""
+ return generic_select_population(
+ species,
+ "phenotypes/select-population.html",
+ request.args.get("population_id") or "",
+ "species.populations.phenotypes.select_population",
+ "species.populations.phenotypes.list_datasets",
+ "phenotypes",
+ "No such population found!")
+
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets",
+ methods=["GET"])
+@require_login
+@with_population(species_redirect_uri="species.populations.phenotypes.index",
+ redirect_uri="species.populations.phenotypes.select_population")
+def list_datasets(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument]
+ """List available phenotype datasets."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ datasets = datasets_by_population(
+ conn, species["SpeciesId"], population["Id"])
+ if len(datasets) == 1:
+ return redirect(url_for(
+ "species.populations.phenotypes.view_dataset",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=datasets[0]["Id"]))
+ return render_template("phenotypes/list-datasets.html",
+ species=species,
+ population=population,
+ datasets=datasets,
+ activelink="list-datasets")
+
+
+def with_dataset(
+ species_redirect_uri: str,
+ population_redirect_uri: str,
+ redirect_uri: str
+):
+ """Ensure the dataset actually exists."""
+ def __decorator__(func):
+ @wraps(func)
+ @with_population(species_redirect_uri, population_redirect_uri)
+ def __with_dataset__(**kwargs):
+ try:
+ _spcid = int(kwargs["species_id"])
+ _popid = int(kwargs["population_id"])
+ _dsetid = int(kwargs.get("dataset_id"))
+ select_dataset_uri = redirect(url_for(
+ redirect_uri, species_id=_spcid, population_id=_popid))
+ if not bool(_dsetid):
+ flash("You need to select a valid 'dataset_id' value.",
+ "alert-danger")
+ return select_dataset_uri
+ with database_connection(app.config["SQL_URI"]) as conn:
+ dataset = dataset_by_id(conn, _spcid, _popid, _dsetid)
+ if not bool(dataset):
+ flash("You must select a valid dataset.",
+ "alert-danger")
+ return select_dataset_uri
+ except ValueError as _verr:
+ app.logger.debug(
+ "Exception converting 'dataset_id' to integer: %s",
+ kwargs.get("dataset_id"),
+ exc_info=True)
+ flash("Expected 'dataset_id' value to be an integer."
+ "alert-danger")
+ return select_dataset_uri
+ return func(dataset=dataset, **kwargs)
+ return __with_dataset__
+ return __decorator__
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/view",
+ methods=["GET"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def view_dataset(# pylint: disable=[unused-argument]
+ species: dict, population: dict, dataset: dict, **kwargs):
+ """View a specific dataset"""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ dataset = dataset_by_id(
+ conn, species["SpeciesId"], population["Id"], dataset["Id"])
+ if not bool(dataset):
+ flash("Could not find such a phenotype dataset!", "alert-danger")
+ return redirect(url_for(
+ "species.populations.phenotypes.list_datasets",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"]))
+
+ start_at = max(safe_int(request.args.get("start_at") or 0), 0)
+ count = int(request.args.get("count") or 20)
+ return render_template("phenotypes/view-dataset.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ phenotype_count=phenotypes_count(
+ conn, population["Id"], dataset["Id"]),
+ phenotypes=enumerate_sequence(
+ dataset_phenotypes(
+ conn,
+ population["Id"],
+ dataset["Id"])),
+ start_from=start_at,
+ count=count,
+ activelink="view-dataset")
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/phenotype/<xref_id>",
+ methods=["GET"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def view_phenotype(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ xref_id: int,
+ **kwargs
+):
+ """View an individual phenotype from the dataset."""
+ def __render__(privileges):
+ phenotype = phenotype_by_id(conn,
+ species["SpeciesId"],
+ population["Id"],
+ dataset["Id"],
+ xref_id)
+ def __non_empty__(value) -> bool:
+ if isinstance(value, str):
+ return value.strip() != ""
+ return bool(value)
+
+ return render_template(
+ "phenotypes/view-phenotype.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ xref_id=xref_id,
+ phenotype=phenotype,
+ has_se=any(bool(item.get("error")) for item in phenotype["data"]),
+ publish_data={
+ key.replace("_", " "): val
+ for key,val in
+ (phenotype_publication_data(conn, phenotype["Id"]) or {}).items()
+ if (key in ("PubMed_ID", "Authors", "Title", "Journal")
+ and __non_empty__(val))
+ },
+ privileges=(privileges
+ ### For demo! Do not commit this part
+ + ("group:resource:edit-resource",
+ "group:resource:delete-resource",)
+ ### END: For demo! Do not commit this part
+ ),
+ activelink="view-phenotype")
+
+ def __fail__(error):
+ if isinstance(error, Response) and error.json() == "No linked resource!":
+ return __render__(tuple())
+ return make_either_error_handler(
+ "There was an error fetching the roles and privileges.")(error)
+
+ with database_connection(app.config["SQL_URI"]) as conn:
+ return oauth2_post(
+ "/auth/resource/phenotypes/individual/linked-resource",
+ json={
+ "species_id": species["SpeciesId"],
+ "population_id": population["Id"],
+ "dataset_id": dataset["Id"],
+ "xref_id": xref_id
+ }
+ ).then(
+ lambda resource: tuple(
+ privilege["privilege_id"] for role in resource["roles"]
+ for privilege in role["privileges"])
+ ).then(__render__).either(__fail__, lambda resp: resp)
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets/create",
+ methods=["GET", "POST"])
+@require_login
+@with_population(
+ species_redirect_uri="species.populations.phenotypes.index",
+ redirect_uri="species.populations.phenotypes.select_population")
+def create_dataset(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument]
+ """Create a new phenotype dataset."""
+ with (database_connection(app.config["SQL_URI"]) as conn,
+ conn.cursor(cursorclass=DictCursor) as cursor):
+ if request.method == "GET":
+ return render_template("phenotypes/create-dataset.html",
+ activelink="create-dataset",
+ species=species,
+ population=population,
+ **decode_errors(
+ request.args.get("error_values", "")))
+
+ form = request.form
+ _errors: tuple[tuple[str, str], ...] = tuple()
+ if not is_valid_representative_name(
+ (form.get("dataset-name") or "").strip()):
+ _errors = _errors + (("dataset-name", "Invalid dataset name."),)
+
+ if not bool((form.get("dataset-fullname") or "").strip()):
+ _errors = _errors + (("dataset-fullname",
+ "You must provide a value for 'Full Name'."),)
+
+ if bool(_errors) > 0:
+ return redirect(url_for(
+ "species.populations.phenotypes.create_dataset",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ error_values=encode_errors(_errors, form)))
+
+ dataset_shortname = (
+ form["dataset-shortname"] or form["dataset-name"]).strip()
+ _pheno_dataset = save_new_dataset(
+ cursor,
+ population["Id"],
+ form["dataset-name"].strip(),
+ form["dataset-fullname"].strip(),
+ dataset_shortname)
+ return redirect(url_for("species.populations.phenotypes.list_datasets",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"]))
+
+
+def process_phenotypes_rqtl2_bundle(error_uri):
+ """Process phenotypes from the uploaded R/qtl2 bundle."""
+ _redisuri = app.config["REDIS_URL"]
+ _sqluri = app.config["SQL_URI"]
+ try:
+ ## Handle huge files here...
+ phenobundle = save_file(request.files["phenotypes-bundle"],
+ Path(app.config["UPLOAD_FOLDER"]))
+ rqc.validate_bundle(phenobundle)
+ return phenobundle
+ except AssertionError as _aerr:
+ app.logger.debug("File upload error!", exc_info=True)
+ flash("Expected a zipped bundle of files with phenotypes' "
+ "information.",
+ "alert-danger")
+ return error_uri
+ except rqe.RQTLError as rqtlerr:
+ app.logger.debug("Bundle validation error!", exc_info=True)
+ flash("R/qtl2 Error: " + " ".join(rqtlerr.args), "alert-danger")
+ return error_uri
+
+
+def process_phenotypes_individual_files(error_uri):
+ """Process the uploaded individual files."""
+ form = request.form
+ cdata = {
+ "sep": form["file-separator"],
+ "comment.char": form["file-comment-character"],
+ "na.strings": form["file-na"].split(" "),
+ }
+ bundlepath = Path(app.config["UPLOAD_FOLDER"],
+ f"{str(uuid.uuid4()).replace('-', '')}.zip")
+ with ZipFile(bundlepath,mode="w") as zfile:
+ for rqtlkey, formkey in (("phenocovar", "phenotype-descriptions"),
+ ("pheno", "phenotype-data"),
+ ("phenose", "phenotype-se"),
+ ("phenonum", "phenotype-n")):
+ cdata[f"{rqtlkey}_transposed"] = (
+ (form.get(f"{formkey}-transposed") or "off") == "on")
+
+ if form.get("resumable-upload", False):
+ # Chunked upload of large files was used
+ filedata = json.loads(form[formkey])
+ zfile.write(
+ Path(app.config["UPLOAD_FOLDER"], filedata["uploaded-file"]),
+ arcname=filedata["original-name"])
+ cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]]
+ else:
+ # TODO: Check this path: fix any bugs.
+ _sentfile = request.files[formkey]
+ if not bool(_sentfile):
+ flash(f"Expected file ('{formkey}') was not provided.",
+ "alert-danger")
+ return error_uri
+
+ filepath = save_file(
+ _sentfile, Path(app.config["UPLOAD_FOLDER"]), hashed=False)
+ zfile.write(
+ Path(app.config["UPLOAD_FOLDER"], filepath),
+ arcname=filepath.name)
+ cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name]
+
+
+ zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2))
+
+ return bundlepath
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/add-phenotypes",
+ methods=["GET", "POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# pylint: disable=[unused-argument, too-many-locals]
+ """Add one or more phenotypes to the dataset."""
+ use_bundle = request.args.get("use_bundle", "").lower() == "true"
+ add_phenos_uri = redirect(url_for(
+ "species.populations.phenotypes.add_phenotypes",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=dataset["Id"]))
+ _redisuri = app.config["REDIS_URL"]
+ _sqluri = app.config["SQL_URI"]
+ with (Redis.from_url(_redisuri, decode_responses=True) as rconn,
+ # database_connection(_sqluri) as conn,
+ # conn.cursor(cursorclass=DictCursor) as cursor
+ ):
+ if request.method == "GET":
+ today = datetime.date.today()
+ return render_template(
+ ("phenotypes/add-phenotypes-with-rqtl2-bundle.html"
+ if use_bundle else "phenotypes/add-phenotypes-raw-files.html"),
+ species=species,
+ population=population,
+ dataset=dataset,
+ monthnames=(
+ "January", "February", "March", "April",
+ "May", "June", "July", "August",
+ "September", "October", "November",
+ "December"),
+ current_month=today.strftime("%B"),
+ current_year=int(today.strftime("%Y")),
+ families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_,
+ use_bundle=use_bundle,
+ activelink="add-phenotypes")
+
+ phenobundle = (process_phenotypes_rqtl2_bundle(add_phenos_uri)
+ if use_bundle else
+ process_phenotypes_individual_files(add_phenos_uri))
+
+ _jobid = uuid.uuid4()
+ _namespace = jobs.jobsnamespace()
+ _ttl_seconds = app.config["JOBS_TTL_SECONDS"]
+ _job = jobs.launch_job(
+ jobs.initialise_job(
+ rconn,
+ _namespace,
+ str(_jobid),
+ [sys.executable, "-m", "scripts.rqtl2.phenotypes_qc", _sqluri,
+ _redisuri, _namespace, str(_jobid), str(species["SpeciesId"]),
+ str(population["Id"]),
+ # str(dataset["Id"]),
+ str(phenobundle),
+ "--loglevel",
+ logging.getLevelName(
+ app.logger.getEffectiveLevel()
+ ).lower(),
+ "--redisexpiry",
+ str(_ttl_seconds)], "phenotype_qc", _ttl_seconds,
+ {"job-metadata": json.dumps({
+ "speciesid": species["SpeciesId"],
+ "populationid": population["Id"],
+ "datasetid": dataset["Id"],
+ "bundle": str(phenobundle.absolute()),
+ **({"publicationid": request.form["publication-id"]}
+ if request.form.get("publication-id") else {})})}),
+ _redisuri,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+ app.logger.debug("JOB DETAILS: %s", _job)
+ jobstatusuri = url_for("species.populations.phenotypes.job_status",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=dataset["Id"],
+ job_id=str(_job["jobid"]))
+ return ((jsonify({
+ "redirect-to": jobstatusuri,
+ "statuscode": 200,
+ "message": ("Follow the 'redirect-to' URI to see the state "
+ "of the quality-control job started for your "
+ "uploaded files.")
+ }), 200)
+ if request.form.get("resumable-upload", False) else
+ redirect(jobstatusuri))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/job/<uuid:job_id>",
+ methods=["GET"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def job_status(
+ species: dict,
+ population: dict,
+ dataset: dict,
+ job_id: uuid.UUID,
+ **kwargs
+):# pylint: disable=[unused-argument]
+ """Retrieve current status of a particular phenotype QC job."""
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ try:
+ job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id))
+ except jobs.JobNotFound as _jnf:
+ job = None
+ return render_template("phenotypes/job-status.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ job_id=job_id,
+ job=job,
+ errors=jobs.job_errors(
+ rconn, jobs.jobsnamespace(), job['jobid']),
+ metadata=jobs.job_files_metadata(
+ rconn, jobs.jobsnamespace(), job['jobid']),
+ activelink="add-phenotypes")
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/job/<uuid:job_id>/review",
+ methods=["GET"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def review_job_data(
+ species: dict,
+ population: dict,
+ dataset: dict,
+ job_id: uuid.UUID,
+ **kwargs
+):# pylint: disable=[unused-argument]
+ """Review data one more time before entering it into the database."""
+ with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+ database_connection(app.config["SQL_URI"]) as conn):
+ try:
+ job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id))
+ except jobs.JobNotFound as _jnf:
+ job = None
+
+ def __metadata_by_type__(by_type, item):
+ filetype = item[1]["filetype"]
+ return {
+ **by_type,
+ filetype: (by_type.get(filetype, tuple())
+ + ({"filename": item[0], **item[1]},))
+ }
+ metadata: dict[str, Any] = reduce(
+ __metadata_by_type__,
+ (jobs.job_files_metadata(
+ rconn, jobs.jobsnamespace(), job['jobid'])
+ if job else {}).items(),
+ {})
+
+ def __desc__(filetype):
+ match filetype:
+ case "phenocovar":
+ desc = "phenotypes"
+ case "pheno":
+ desc = "phenotypes data"
+ case "phenose":
+ desc = "phenotypes standard-errors"
+ case "phenonum":
+ desc = "phenotypes samples"
+ case _:
+ desc = f"unknown file type '{filetype}'."
+
+ return desc
+
+ def __summarise__(filetype, files):
+ return {
+ "filetype": filetype,
+ "number-of-files": len(files),
+ "total-data-rows": sum(
+ int(afile["linecount"]) - 1 for afile in files),
+ "description": __desc__(filetype)
+ }
+
+ summary = {
+ filetype: __summarise__(filetype, meta)
+ for filetype,meta in metadata.items()
+ }
+ _job_metadata = json.loads(job["job-metadata"])
+ return render_template("phenotypes/review-job-data.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ job_id=job_id,
+ job=job,
+ summary=summary,
+ publication=(
+ fetch_publication_by_id(
+ conn, int(_job_metadata["publicationid"]))
+ if _job_metadata.get("publicationid")
+ else None),
+ activelink="add-phenotypes")
+
+
+def load_phenotypes_success_handler(job):
+ """Handle loading new phenotypes into the database successfully."""
+ return redirect(url_for(
+ "species.populations.phenotypes.load_data_success",
+ species_id=job["metadata"]["species_id"],
+ population_id=job["metadata"]["population_id"],
+ dataset_id=job["metadata"]["dataset_id"],
+ job_id=job["job_id"]))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/load-data-to-database",
+ methods=["POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def load_data_to_database(
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):# pylint: disable=[unused-argument]
+ """Load the data from the given QC job into the database."""
+ jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+ with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+ sqlite3.connection(jobs_db) as conn):
+ qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"])
+ _meta = json.loads(qc_job["job-metadata"])
+ load_job_id = uuid.uuid4()
+ _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower()
+ command = [
+ sys.executable,
+ "-u",
+ "-m",
+ "scripts.load_phenotypes_to_db",
+ app.config["SQL_URI"],
+ jobs_db,
+ str(load_job_id),
+ "--log-level",
+ _loglevel
+ ]
+
+ def __handle_error__(resp):
+ return render_template("http-error.html", *resp.json())
+
+ def __handle_success__(load_job):
+ app.logger.debug("The phenotypes loading job: %s", load_job)
+ return redirect(url_for(
+ "background-jobs.job_status", job_id=load_job["job_id"]))
+
+ issued = datetime.datetime.now()
+ jwtkey = jwks.newest_jwk_with_rotation(
+ jwks.jwks_directory(app, "UPLOADER_SECRETS"),
+ int(app.config["JWKS_ROTATION_AGE_DAYS"]))
+
+ return mrequests.post(
+ urljoin(oauth2client.authserver_uri(), "auth/token"),
+ json={
+ "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
+ "scope": oauth2client.SCOPE,
+ "assertion": jwt.encode(
+ header={
+ "alg": "RS256",
+ "typ": "JWT",
+ "kid": jwtkey.as_dict()["kid"]
+ },
+ payload={
+ "iss": str(oauth2client.oauth2_clientid()),
+ "sub": str(session.user_details()["user_id"]),
+ "aud": urljoin(oauth2client.authserver_uri(),
+ "auth/token"),
+ # TODO: Update expiry time once fix is implemented in
+ # auth server.
+ "exp": (issued + timedelta(minutes=5)).timestamp(),
+ "nbf": int(issued.timestamp()),
+ "iat": int(issued.timestamp()),
+ "jti": str(uuid.uuid4())
+ },
+ key=jwtkey).decode("utf8"),
+ "client_id": oauth2client.oauth2_clientid()
+ }
+ ).then(
+ lambda token: gnlibs_jobs.initialise_job(
+ conn,
+ load_job_id,
+ command,
+ "load-new-phenotypes-data",
+ extra_meta={
+ "species_id": species["SpeciesId"],
+ "population_id": population["Id"],
+ "dataset_id": dataset["Id"],
+ "bundle_file": _meta["bundle"],
+ "publication_id": _meta["publicationid"],
+ "authserver": oauth2client.authserver_uri(),
+ "token": token["access_token"],
+ "success_handler": (
+ "uploader.phenotypes.views"
+ ".load_phenotypes_success_handler")
+ })
+ ).then(
+ lambda job: gnlibs_jobs.launch_job(
+ job,
+ jobs_db,
+ Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"),
+ worker_manager="gn_libs.jobs.launcher",
+ loglevel=_loglevel)
+ ).either(__handle_error__, __handle_success__)
+
+
+def update_phenotype_metadata(conn, metadata: dict):
+ """Update a phenotype's basic metadata values."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM Phenotype WHERE Id=%(phenotype-id)s",
+ metadata)
+ res = {
+ **{
+ _key: _val for _key,_val in {
+ key.lower().replace("_", "-"): value
+ for key, value in (cursor.fetchone() or {}).items()
+ }.items()
+ if _key in metadata.keys()
+ },
+ "phenotype-id": metadata.get("phenotype-id")
+ }
+ if res == metadata:
+ return False
+
+ cursor.execute(
+ "UPDATE Phenotype SET "
+ "Pre_publication_description=%(pre-publication-description)s, "
+ "Post_publication_description=%(post-publication-description)s, "
+ "Original_description=%(original-description)s, "
+ "Units=%(units)s, "
+ "Pre_publication_abbreviation=%(pre-publication-abbreviation)s, "
+ "Post_publication_abbreviation=%(post-publication-abbreviation)s "
+ "WHERE Id=%(phenotype-id)s",
+ metadata)
+ return cursor.rowcount
+
+
+def update_phenotype_values(conn, values):
+ """Update a phenotype's data values."""
+ with conn.cursor() as cursor:
+ cursor.executemany(
+ "UPDATE PublishData SET value=%(new)s "
+ "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in values if item["new"] is not None))
+ cursor.executemany(
+ "DELETE FROM PublishData "
+ "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in values if item["new"] is None))
+ return len(values)
+ return 0
+
+
+def update_phenotype_se(conn, serrs):
+ """Update a phenotype's standard-error values."""
+ with conn.cursor() as cursor:
+ cursor.executemany(
+ "INSERT INTO PublishSE(DataId, StrainId, error) "
+ "VALUES(%(data_id)s, %(strain_id)s, %(new)s) "
+ "ON DUPLICATE KEY UPDATE error=VALUES(error)",
+ tuple(item for item in serrs if item["new"] is not None))
+ cursor.executemany(
+ "DELETE FROM PublishSE "
+ "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in serrs if item["new"] is None))
+ return len(serrs)
+ return 0
+
+
+def update_phenotype_n(conn, counts):
+ """Update a phenotype's strain counts."""
+ with conn.cursor() as cursor:
+ cursor.executemany(
+ "INSERT INTO NStrain(DataId, StrainId, count) "
+ "VALUES(%(data_id)s, %(strain_id)s, %(new)s) "
+ "ON DUPLICATE KEY UPDATE count=VALUES(count)",
+ tuple(item for item in counts if item["new"] is not None))
+ cursor.executemany(
+ "DELETE FROM NStrain "
+ "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in counts if item["new"] is None))
+ return len(counts)
+
+ return 0
+
+
+def update_phenotype_data(conn, data: dict):
+ """Update the numeric data for a phenotype."""
+ def __organise_by_dataid_and_strainid__(acc, current):
+ _key, dataid, strainid = current[0].split("::")
+ _keysrc, _keytype = _key.split("-")
+ newkey = f"{dataid}::{strainid}"
+ newitem = acc.get(newkey, {})
+ newitem[_keysrc] = newitem.get(_keysrc, {})
+ newitem[_keysrc][_keytype] = current[1]
+ return {**acc, newkey: newitem}
+
+ def __separate_items__(acc, row):
+ key, val = row
+ return ({
+ **acc[0],
+ key: {
+ **val["value"],
+ "changed?": (not val["value"]["new"] == val["value"]["original"])
+ }
+ }, {
+ **acc[1],
+ key: {
+ **val["se"],
+ "changed?": (not val["se"]["new"] == val["se"]["original"])
+ }
+ },{
+ **acc[2],
+ key: {
+ **val["n"],
+ "changed?": (not val["n"]["new"] == val["n"]["original"])
+ }
+ })
+
+ values, serrs, counts = tuple(
+ tuple({
+ "data_id": row[0].split("::")[0],
+ "strain_id": row[0].split("::")[1],
+ "new": row[1]["new"]
+ } for row in item)
+ for item in (
+ filter(lambda val: val[1]["changed?"], item.items())# type: ignore[arg-type]
+ for item in reduce(# type: ignore[var-annotated]
+ __separate_items__,
+ reduce(__organise_by_dataid_and_strainid__,
+ data.items(),
+ {}).items(),
+ ({}, {}, {}))))
+
+ return (update_phenotype_values(conn, values),
+ update_phenotype_se(conn, serrs),
+ update_phenotype_n(conn, counts))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/phenotype/<int:xref_id>/edit",
+ methods=["GET", "POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ xref_id: int,
+ **kwargs
+):
+ """Edit the data for a particular phenotype."""
+ def __render__(**kwargs):
+ processed_kwargs = {
+ **kwargs,
+ "privileges": (kwargs.get("privileges", tuple())
+ ### For demo! Do not commit this part
+ + ("group:resource:edit-resource",
+ "group:resource:delete-resource",)
+ ### END: For demo! Do not commit this part
+ )
+ }
+ return render_template(
+ "phenotypes/edit-phenotype.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ xref_id=xref_id,
+ families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_,
+ **processed_kwargs,
+ activelink="edit-phenotype")
+
+ with database_connection(app.config["SQL_URI"]) as conn:
+ if request.method == "GET":
+ def __fetch_phenotype__(privileges):
+ phenotype = phenotype_by_id(conn,
+ species["SpeciesId"],
+ population["Id"],
+ dataset["Id"],
+ xref_id)
+ if phenotype is None:
+ msg = ("Could not find the phenotype with cross-reference ID"
+ f" '{xref_id}' from dataset '{dataset['FullName']}' "
+ f" from the '{population['FullName']}' population of "
+ f" species '{species['FullName']}'.")
+ return Left({"privileges": privileges, "phenotype-error": msg})
+ return {"privileges": privileges, "phenotype": phenotype}
+
+ def __fetch_publication_data__(**kwargs):
+ pheno = kwargs["phenotype"]
+ return {
+ **kwargs,
+ "publication_data": phenotype_publication_data(
+ conn, pheno["Id"])
+ }
+
+ def __fail__(failure_object):
+ # process the object
+ return __render__(failure_object=failure_object)
+
+ return oauth2_post(
+ "/auth/resource/phenotypes/individual/linked-resource",
+ json={
+ "species_id": species["SpeciesId"],
+ "population_id": population["Id"],
+ "dataset_id": dataset["Id"],
+ "xref_id": xref_id
+ }
+ ).then(
+ lambda resource: tuple(
+ privilege["privilege_id"] for role in resource["roles"]
+ for privilege in role["privileges"])
+ ).then(
+ __fetch_phenotype__
+ ).then(
+ lambda args: __fetch_publication_data__(**args)
+ ).either(__fail__, lambda args: __render__(**args))
+
+ ## POST
+ _change = False
+ match request.form.get("submit", "invalid-action"):
+ case "update basic metadata":
+ _change = update_phenotype_metadata(conn, {
+ key: value.strip() if bool(value.strip()) else None
+ for key, value in request.form.items()
+ if key not in ("submit",)
+ })
+ msg = "Basic metadata was updated successfully."
+ case "update data":
+ _update = update_phenotype_data(conn, {
+ key: value.strip() if bool(value.strip()) else None
+ for key, value in request.form.items()
+ if key not in ("submit",)
+ })
+ msg = (f"{_update[0]} value rows, {_update[1]} standard-error "
+ f"rows and {_update[2]} 'N' rows were updated.")
+ _change = any(item != 0 for item in _update)
+ case "update publication":
+ flash("NOT IMPLEMENTED: Would update publication data.", "alert-success")
+ case _:
+ flash("Invalid phenotype editing action.", "alert-danger")
+
+ if _change:
+ flash(msg, "alert-success")
+ return redirect(url_for(
+ "species.populations.phenotypes.view_phenotype",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=dataset["Id"],
+ xref_id=xref_id))
+
+ flash("No change was made by the user.", "alert-info")
+ return redirect(url_for(
+ "species.populations.phenotypes.edit_phenotype_data",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=dataset["Id"],
+ xref_id=xref_id))
+
+
+def process_phenotype_data_for_download(pheno: dict) -> dict:
+ """Sanitise data for download."""
+ return {
+ "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}",
+ **{
+ key: val for key, val in pheno.items()
+ if key not in ("Id", "xref_id", "data", "Units")
+ },
+ **{
+ data_item["StrainName"]: data_item["value"]
+ for data_item in pheno.get("data", {}).values()
+ }
+ }
+
+
+BULK_EDIT_COMMON_FIELDNAMES = [
+ "UniqueIdentifier",
+ "Post_publication_description",
+ "Pre_publication_abbreviation",
+ "Pre_publication_description",
+ "Original_description",
+ "Post_publication_abbreviation",
+ "PubMed_ID"
+]
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/edit-download",
+ methods=["POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_download_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):
+ formdata = request.json
+ with database_connection(app.config["SQL_URI"]) as conn:
+ samples_list = [
+ sample["Name"] for sample in samples_by_species_and_population(
+ conn, species["SpeciesId"], population["Id"])]
+ data = (
+ process_phenotype_data_for_download(pheno)
+ for pheno in phenotypes_data_by_ids(conn, tuple({
+ "population_id": population["Id"],
+ "phenoid": row["phenotype_id"],
+ "xref_id": row["xref_id"]
+ } for row in formdata)))
+
+ with (tempfile.TemporaryDirectory(
+ prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir):
+ filename = Path(tmpdir).joinpath("tempfile.tsv")
+ with open(filename, mode="w") as outfile:
+ outfile.write(
+ "# **DO NOT** delete the 'UniqueIdentifier' row. It is used "
+ "by the system to identify and edit the correct rows and "
+ "columns in the database.\n")
+ outfile.write(
+ "# The '…_description' fields are useful for you to figure out "
+ "what row you are working on. Changing any of this fields will "
+ "also update the database, so do be careful.\n")
+ outfile.write(
+ "# Leave a field empty to delete the value in the database.\n")
+ outfile.write(
+ "# Any line beginning with a '#' character is considered a "
+ "comment line. This line, and all the lines above it, are "
+ "all comment lines. Comment lines will be ignored.\n")
+ writer = csv.DictWriter(outfile,
+ fieldnames= (
+ BULK_EDIT_COMMON_FIELDNAMES +
+ samples_list),
+ dialect="excel-tab")
+ writer.writeheader()
+ writer.writerows(data)
+ outfile.flush()
+
+ return send_file(
+ filename,
+ mimetype="text/csv",
+ as_attachment=True,
+ download_name=secure_filename(f"{dataset['Name']}_data"))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/edit-upload",
+ methods=["GET", "POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_upload_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):
+ if request.method == "GET":
+ return render_template(
+ "phenotypes/bulk-edit-upload.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ activelink="edit-phenotype")
+
+ edit_file = save_file(request.files["file-upload-bulk-edit-upload"],
+ Path(app.config["UPLOAD_FOLDER"]))
+
+ jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+ with sqlite3.connection(jobs_db) as conn:
+ job_id = uuid.uuid4()
+ job_cmd = [
+ sys.executable, "-u",
+ "-m", "scripts.phenotypes_bulk_edit",
+ app.config["SQL_URI"],
+ jobs_db,
+ str(job_id),
+ "--log-level",
+ logging.getLevelName(
+ app.logger.getEffectiveLevel()
+ ).lower()
+ ]
+ app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd)
+ _job = gnlibs_jobs.launch_job(
+ gnlibs_jobs.initialise_job(conn,
+ job_id,
+ job_cmd,
+ "phenotype-bulk-edit",
+ extra_meta = {
+ "edit-file": str(edit_file),
+ "species-id": species["SpeciesId"],
+ "population-id": population["Id"],
+ "dataset-id": dataset["Id"]
+ }),
+ jobs_db,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors",
+ worker_manager="gn_libs.jobs.launcher")
+
+
+ return redirect(url_for("background-jobs.job_status",
+ job_id=job_id,
+ job_type="phenotype-bulk-edit"))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/load-data-success/<uuid:job_id>",
+ methods=["GET"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def load_data_success(
+ species: dict,
+ population: dict,
+ dataset: dict,
+ job_id: uuid.UUID,
+ **kwargs
+):# pylint: disable=[unused-argument]
+ with (database_connection(app.config["SQL_URI"]) as conn,
+ sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"])
+ as jobsconn):
+ try:
+ gn2_uri = urlparse(app.config["GN2_SERVER_URL"])
+ job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True)
+ app.logger.debug("THE JOB: %s", job)
+ _xref_ids = (str(item) for item
+ in json.loads(job["metadata"].get("xref_ids", "[]")))
+ _publication = fetch_publication_by_id(
+ conn, int(job["metadata"].get("publication_id", "0")))
+ _search_terms = (item for item in
+ (str(_publication["PubMed_ID"] or ""),
+ _publication["Authors"],
+ (_publication["Title"] or ""))
+ if item != "")
+ return render_template("phenotypes/load-phenotypes-success.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ job=job,
+ search_page_uri=urlunparse(ParseResult(
+ scheme=gn2_uri.scheme,
+ netloc=gn2_uri.netloc,
+ path="/search",
+ params="",
+ query=urlencode({
+ "species": species["Name"],
+ "group": population["Name"],
+ "type": "Phenotypes",
+ "dataset": dataset["Name"],
+ "search_terms_or": (
+ # Very long URLs will cause
+ # errors.
+ " ".join(_xref_ids)
+ if len(_xref_ids) <= 100
+ else ""),
+ "search_terms_and": " ".join(
+ _search_terms).strip(),
+ "accession_id": "None",
+ "FormID": "searchResult"
+ }),
+ fragment="")))
+ except JobNotFound as jnf:
+ return render_template("jobs/job-not-found.html", job_id=job_id)