"""Views handling ('classical') phenotypes."""# pylint: disable=[too-many-lines]
import sys
import uuid
import json
import logging
from typing import Any
from pathlib import Path
from zipfile import ZipFile
from functools import wraps, reduce
from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode
import datetime
from redis import Redis
from pymonad.either import Left
from requests.models import Response
from MySQLdb.cursors import DictCursor
from gn_libs import sqlite3
from gn_libs import jobs as gnlibs_jobs
from gn_libs.jobs.jobs import JobNotFound
from gn_libs.mysqldb import database_connection
from flask import (flash,
request,
jsonify,
redirect,
Blueprint,
current_app as app)
from r_qtl import r_qtl2_qc as rqc
from r_qtl import exceptions as rqe
from uploader import jobs
from uploader import session
from uploader.files import save_file
from uploader.configutils import uploads_dir
from uploader.flask_extensions import url_for
from uploader.ui import make_template_renderer
from uploader.oauth2.client import oauth2_post
from uploader.oauth2.tokens import request_token
from uploader.authorisation import require_login
from uploader.oauth2 import client as oauth2client
from uploader.route_utils import build_next_argument
from uploader.route_utils import generic_select_population
from uploader.datautils import safe_int, enumerate_sequence
from uploader.species.models import all_species, species_by_id
from uploader.monadic_requests import make_either_error_handler
from uploader.publications.models import fetch_publication_by_id
from uploader.request_checks import with_species, with_population
from uploader.input_validation import (encode_errors,
decode_errors,
is_valid_representative_name)
from .models import (dataset_by_id,
phenotype_by_id,
phenotypes_count,
save_new_dataset,
dataset_phenotypes,
datasets_by_population,
phenotype_publication_data)
logger = logging.getLogger(__name__)
phenotypesbp = Blueprint("phenotypes", __name__)
render_template = make_template_renderer("phenotypes")
_FAMILIES_WITH_SE_AND_N_ = (
"Reference Populations (replicate average, SE, N)",)
@phenotypesbp.route("/phenotypes", methods=["GET"])
@require_login
def index():
"""Direct entry-point for phenotypes data handling."""
with database_connection(app.config["SQL_URI"]) as conn:
if not bool(request.args.get("species_id")):
return render_template("phenotypes/index.html",
species=all_species(conn),
activelink="phenotypes")
species_id = request.args.get("species_id")
if species_id == "CREATE-SPECIES":
return redirect(url_for(
"species.create_species",
return_to="species.populations.phenotypes.select_population"))
species = species_by_id(conn, species_id)
if not bool(species):
flash("No such species!", "alert-danger")
return redirect(url_for("species.populations.phenotypes.index"))
return redirect(url_for("species.populations.phenotypes.select_population",
species_id=species["SpeciesId"]))
@phenotypesbp.route("<int:species_id>/phenotypes/select-population",
methods=["GET"])
@require_login
@with_species(redirect_uri="species.populations.phenotypes.index")
def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument]
"""Select the population for your phenotypes."""
return generic_select_population(
species,
"phenotypes/select-population.html",
request.args.get("population_id") or "",
"species.populations.phenotypes.select_population",
"species.populations.phenotypes.list_datasets",
"phenotypes",
"No such population found!")
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets",
methods=["GET"])
@require_login
@with_population(species_redirect_uri="species.populations.phenotypes.index",
redirect_uri="species.populations.phenotypes.select_population")
def list_datasets(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument]
"""List available phenotype datasets."""
with database_connection(app.config["SQL_URI"]) as conn:
datasets = datasets_by_population(
conn, species["SpeciesId"], population["Id"])
if len(datasets) == 1:
return redirect(url_for(
"species.populations.phenotypes.view_dataset",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=datasets[0]["Id"]))
return render_template("phenotypes/list-datasets.html",
species=species,
population=population,
datasets=datasets,
activelink="list-datasets")
def with_dataset(
species_redirect_uri: str,
population_redirect_uri: str,
redirect_uri: str
):
"""Ensure the dataset actually exists."""
def __decorator__(func):
@wraps(func)
@with_population(species_redirect_uri, population_redirect_uri)
def __with_dataset__(**kwargs):
try:
_spcid = int(kwargs["species_id"])
_popid = int(kwargs["population_id"])
_dsetid = int(kwargs.get("dataset_id"))
select_dataset_uri = redirect(url_for(
redirect_uri, species_id=_spcid, population_id=_popid))
if not bool(_dsetid):
flash("You need to select a valid 'dataset_id' value.",
"alert-danger")
return select_dataset_uri
with database_connection(app.config["SQL_URI"]) as conn:
dataset = dataset_by_id(conn, _spcid, _popid, _dsetid)
if not bool(dataset):
flash("You must select a valid dataset.",
"alert-danger")
return select_dataset_uri
except ValueError as _verr:
app.logger.debug(
"Exception converting 'dataset_id' to integer: %s",
kwargs.get("dataset_id"),
exc_info=True)
flash("Expected 'dataset_id' value to be an integer."
"alert-danger")
return select_dataset_uri
return func(dataset=dataset, **kwargs)
return __with_dataset__
return __decorator__
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/view",
methods=["GET"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def view_dataset(# pylint: disable=[unused-argument]
species: dict, population: dict, dataset: dict, **kwargs):
"""View a specific dataset"""
with database_connection(app.config["SQL_URI"]) as conn:
dataset = dataset_by_id(
conn, species["SpeciesId"], population["Id"], dataset["Id"])
if not bool(dataset):
flash("Could not find such a phenotype dataset!", "alert-danger")
return redirect(url_for(
"species.populations.phenotypes.list_datasets",
species_id=species["SpeciesId"],
population_id=population["Id"]))
start_at = max(safe_int(request.args.get("start_at") or 0), 0)
count = int(request.args.get("count") or 20)
return render_template("phenotypes/view-dataset.html",
species=species,
population=population,
dataset=dataset,
phenotype_count=phenotypes_count(
conn, population["Id"], dataset["Id"]),
phenotypes=enumerate_sequence(
dataset_phenotypes(
conn,
population["Id"],
dataset["Id"])),
start_from=start_at,
count=count,
activelink="view-dataset")
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/phenotype/<xref_id>",
methods=["GET"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def view_phenotype(# pylint: disable=[unused-argument]
species: dict,
population: dict,
dataset: dict,
xref_id: int,
**kwargs
):
"""View an individual phenotype from the dataset."""
def __render__(privileges):
phenotype = phenotype_by_id(conn,
species["SpeciesId"],
population["Id"],
dataset["Id"],
xref_id)
return render_template(
"phenotypes/view-phenotype.html",
species=species,
population=population,
dataset=dataset,
xref_id=xref_id,
phenotype=phenotype,
has_se=any(bool(item.get("error")) for item in phenotype["data"]),
publication=(phenotype_publication_data(conn, phenotype["Id"]) or {}),
privileges=privileges,
next=build_next_argument(
uri="species.populations.phenotypes.view_phenotype",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"],
xref_id=xref_id),
activelink="view-phenotype")
def __fail__(error):
if isinstance(error, Response) and error.json() == "No linked resource!":
return __render__(tuple())
return make_either_error_handler(
"There was an error fetching the roles and privileges.")(error)
with database_connection(app.config["SQL_URI"]) as conn:
return oauth2_post(
"/auth/resource/phenotypes/individual/linked-resource",
json={
"species_id": species["SpeciesId"],
"population_id": population["Id"],
"dataset_id": dataset["Id"],
"xref_id": xref_id
}
).then(
lambda resource: tuple(
privilege["privilege_id"] for role in resource["roles"]
for privilege in role["privileges"])
).then(__render__).either(__fail__, lambda resp: resp)
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets/create",
methods=["GET", "POST"])
@require_login
@with_population(
species_redirect_uri="species.populations.phenotypes.index",
redirect_uri="species.populations.phenotypes.select_population")
def create_dataset(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument]
"""Create a new phenotype dataset."""
with (database_connection(app.config["SQL_URI"]) as conn,
conn.cursor(cursorclass=DictCursor) as cursor):
if request.method == "GET":
return render_template("phenotypes/create-dataset.html",
activelink="create-dataset",
species=species,
population=population,
**decode_errors(
request.args.get("error_values", "")))
form = request.form
_errors: tuple[tuple[str, str], ...] = tuple()
if not is_valid_representative_name(
(form.get("dataset-name") or "").strip()):
_errors = _errors + (("dataset-name", "Invalid dataset name."),)
if not bool((form.get("dataset-fullname") or "").strip()):
_errors = _errors + (("dataset-fullname",
"You must provide a value for 'Full Name'."),)
if bool(_errors) > 0:
return redirect(url_for(
"species.populations.phenotypes.create_dataset",
species_id=species["SpeciesId"],
population_id=population["Id"],
error_values=encode_errors(_errors, form)))
dataset_shortname = (
form["dataset-shortname"] or form["dataset-name"]).strip()
_pheno_dataset = save_new_dataset(
cursor,
population["Id"],
form["dataset-name"].strip(),
form["dataset-fullname"].strip(),
dataset_shortname)
return redirect(url_for("species.populations.phenotypes.list_datasets",
species_id=species["SpeciesId"],
population_id=population["Id"]))
def process_phenotypes_rqtl2_bundle(error_uri):
"""Process phenotypes from the uploaded R/qtl2 bundle."""
_redisuri = app.config["REDIS_URL"]
_sqluri = app.config["SQL_URI"]
try:
## Handle huge files here...
phenobundle = save_file(request.files["phenotypes-bundle"],
uploads_dir(app))
rqc.validate_bundle(phenobundle)
return phenobundle
except AssertionError as _aerr:
app.logger.debug("File upload error!", exc_info=True)
flash("Expected a zipped bundle of files with phenotypes' "
"information.",
"alert-danger")
return error_uri
except rqe.RQTLError as rqtlerr:
app.logger.debug("Bundle validation error!", exc_info=True)
flash("R/qtl2 Error: " + " ".join(rqtlerr.args), "alert-danger")
return error_uri
def process_phenotypes_individual_files(error_uri):
"""Process the uploaded individual files."""
form = request.form
cdata = {
"sep": form["file-separator"],
"comment.char": form["file-comment-character"],
"na.strings": form["file-na"].split(" "),
}
bundlepath = Path(uploads_dir(app),
f"{str(uuid.uuid4()).replace('-', '')}.zip")
with ZipFile(bundlepath,mode="w") as zfile:
for rqtlkey, formkey, _type in (
("phenocovar", "phenotype-descriptions", "mandatory"),
("pheno", "phenotype-data", "mandatory"),
("phenose", "phenotype-se", "optional"),
("phenonum", "phenotype-n", "optional")):
if _type == "optional" and not bool(form.get(formkey)):
continue # skip if an optional key does not exist.
cdata[f"{rqtlkey}_transposed"] = (
(form.get(f"{formkey}-transposed") or "off") == "on")
if form.get("resumable-upload", False):
# Chunked upload of large files was used
filedata = json.loads(form[formkey])
zfile.write(
Path(uploads_dir(app), filedata["uploaded-file"]),
arcname=filedata["original-name"])
cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]]
else:
# T0DO: Check this path: fix any bugs.
_sentfile = request.files[formkey]
if not bool(_sentfile):
flash(f"Expected file ('{formkey}') was not provided.",
"alert-danger")
return error_uri
filepath = save_file(
_sentfile, uploads_dir(app), hashed=False)
zfile.write(
Path(uploads_dir(app), filepath),
arcname=filepath.name)
cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name]
zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2))
return bundlepath
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/add-phenotypes",
methods=["GET", "POST"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# pylint: disable=[unused-argument, too-many-locals]
"""Add one or more phenotypes to the dataset."""
use_bundle = request.args.get("use_bundle", "").lower() == "true"
add_phenos_uri = redirect(url_for(
"species.populations.phenotypes.add_phenotypes",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"]))
_redisuri = app.config["REDIS_URL"]
_sqluri = app.config["SQL_URI"]
with Redis.from_url(_redisuri, decode_responses=True) as rconn:
if request.method == "GET":
today = datetime.date.today()
return render_template(
("phenotypes/add-phenotypes-with-rqtl2-bundle.html"
if use_bundle
else "phenotypes/add-phenotypes-raw-files.html"),
species=species,
population=population,
dataset=dataset,
monthnames=(
"January", "February", "March", "April",
"May", "June", "July", "August",
"September", "October", "November",
"December"),
current_month=today.strftime("%B"),
current_year=int(today.strftime("%Y")),
families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_,
use_bundle=use_bundle,
activelink="add-phenotypes")
phenobundle = (process_phenotypes_rqtl2_bundle(add_phenos_uri)
if use_bundle else
process_phenotypes_individual_files(add_phenos_uri))
_jobid = uuid.uuid4()
_namespace = jobs.jobsnamespace()
_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
_job = jobs.launch_job(
jobs.initialise_job(
rconn,
_namespace,
str(_jobid),
[sys.executable, "-m", "scripts.rqtl2.phenotypes_qc", _sqluri,
_redisuri, _namespace, str(_jobid), str(species["SpeciesId"]),
str(population["Id"]),
str(phenobundle),
"--loglevel",
logging.getLevelName(
app.logger.getEffectiveLevel()
).lower(),
"--redisexpiry",
str(_ttl_seconds)], "phenotype_qc", _ttl_seconds,
{"job-metadata": json.dumps({
"speciesid": species["SpeciesId"],
"populationid": population["Id"],
"datasetid": dataset["Id"],
"bundle": str(phenobundle.absolute()),
**({"publicationid": request.form["publication-id"]}
if request.form.get("publication-id") else {})})}),
_redisuri,
f"{uploads_dir(app)}/job_errors")
app.logger.debug("JOB DETAILS: %s", _job)
jobstatusuri = url_for("species.populations.phenotypes.job_status",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"],
job_id=str(_job["jobid"]))
return ((jsonify({
"redirect-to": jobstatusuri,
"statuscode": 200,
"message": ("Follow the 'redirect-to' URI to see the state "
"of the quality-control job started for your "
"uploaded files.")
}), 200)
if request.form.get("resumable-upload", False) else
redirect(jobstatusuri))
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/job/<uuid:job_id>",
methods=["GET"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def job_status(
species: dict,
population: dict,
dataset: dict,
job_id: uuid.UUID,
**kwargs
):# pylint: disable=[unused-argument]
"""Retrieve current status of a particular phenotype QC job."""
with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
try:
job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id))
except jobs.JobNotFound as _jnf:
job = None
return render_template("phenotypes/job-status.html",
species=species,
population=population,
dataset=dataset,
job_id=job_id,
job=job,
errors=jobs.job_errors(
rconn, jobs.jobsnamespace(), job['jobid']),
metadata=jobs.job_files_metadata(
rconn, jobs.jobsnamespace(), job['jobid']),
activelink="add-phenotypes")
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/job/<uuid:job_id>/review",
methods=["GET"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def review_job_data(
species: dict,
population: dict,
dataset: dict,
job_id: uuid.UUID,
**kwargs
):# pylint: disable=[unused-argument]
"""Review data one more time before entering it into the database."""
with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
database_connection(app.config["SQL_URI"]) as conn):
try:
job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id))
except jobs.JobNotFound as _jnf:
job = None
def __metadata_by_type__(by_type, item):
filetype = item[1]["filetype"]
return {
**by_type,
filetype: (by_type.get(filetype, tuple())
+ ({"filename": item[0], **item[1]},))
}
metadata: dict[str, Any] = reduce(
__metadata_by_type__,
(jobs.job_files_metadata(
rconn, jobs.jobsnamespace(), job['jobid'])
if job else {}).items(),
{})
def __desc__(filetype):
match filetype:
case "phenocovar":
desc = "phenotypes"
case "pheno":
desc = "phenotypes data"
case "phenose":
desc = "phenotypes standard-errors"
case "phenonum":
desc = "phenotypes samples"
case _:
desc = f"unknown file type '{filetype}'."
return desc
def __summarise__(filetype, files):
return {
"filetype": filetype,
"number-of-files": len(files),
"total-data-rows": sum(
int(afile["linecount"]) - 1 for afile in files),
"description": __desc__(filetype)
}
summary = {
filetype: __summarise__(filetype, meta)
for filetype,meta in metadata.items()
}
_job_metadata = json.loads(job["job-metadata"])
return render_template("phenotypes/review-job-data.html",
species=species,
population=population,
dataset=dataset,
job_id=job_id,
job=job,
summary=summary,
publication=(
fetch_publication_by_id(
conn, int(_job_metadata["publicationid"]))
if _job_metadata.get("publicationid")
else None),
activelink="add-phenotypes")
def load_phenotypes_success_handler(job):
"""Handle loading new phenotypes into the database successfully."""
return redirect(url_for(
"species.populations.phenotypes.load_data_success",
species_id=job["metadata"]["species_id"],
population_id=job["metadata"]["population_id"],
dataset_id=job["metadata"]["dataset_id"],
job_id=job["job_id"]))
def proceed_to_job_status(job):
app.logger.debug("The new job: %s", job)
return redirect(url_for("background-jobs.job_status", job_id=job["job_id"]))
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/load-data-to-database",
methods=["POST"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def load_data_to_database(
species: dict,
population: dict,
dataset: dict,
**kwargs
):# pylint: disable=[unused-argument]
"""Load the data from the given QC job into the database."""
_jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
sqlite3.connection(_jobs_db) as conn):
# T0DO: Maybe break the connection between the jobs here, pass:
# - the bundle name (rebuild the full path here.)
# - publication details, where separate
# - details about the files: e.g. total lines, etc
qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"])
_meta = json.loads(qc_job["job-metadata"])
_load_job_id = uuid.uuid4()
_loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower()
command = [
sys.executable,
"-u",
"-m",
"scripts.load_phenotypes_to_db",
app.config["SQL_URI"],
_jobs_db,
str(_load_job_id),
"--log-level",
_loglevel
]
def __handle_error__(resp):
return render_template("http-error.html", *resp.json())
return request_token(
token_uri=urljoin(oauth2client.authserver_uri(), "auth/token"),
user_id=session.user_details()["user_id"]
).then(
lambda token: gnlibs_jobs.initialise_job(
conn,
_load_job_id,
command,
"load-new-phenotypes-data",
extra_meta={
"species_id": species["SpeciesId"],
"population_id": population["Id"],
"dataset_id": dataset["Id"],
"bundle_file": _meta["bundle"],
"publication_id": _meta["publicationid"],
"authserver": oauth2client.authserver_uri(),
"token": token["access_token"],
"success_handler": (
"uploader.phenotypes.views"
".load_phenotypes_success_handler")
},
external_id=session.logged_in_user_id())
).then(
lambda job: gnlibs_jobs.launch_job(
job,
_jobs_db,
Path(f"{uploads_dir(app)}/job_errors"),
worker_manager="gn_libs.jobs.launcher",
loglevel=_loglevel)
).either(__handle_error__, proceed_to_job_status)
def update_phenotype_metadata(conn, metadata: dict):
"""Update a phenotype's basic metadata values."""
with conn.cursor(cursorclass=DictCursor) as cursor:
cursor.execute("SELECT * FROM Phenotype WHERE Id=%(phenotype-id)s",
metadata)
res = {
**{
_key: _val for _key,_val in {
key.lower().replace("_", "-"): value
for key, value in (cursor.fetchone() or {}).items()
}.items()
if _key in metadata.keys()
},
"phenotype-id": metadata.get("phenotype-id")
}
if res == metadata:
return False
cursor.execute(
"UPDATE Phenotype SET "
"Pre_publication_description=%(pre-publication-description)s, "
"Post_publication_description=%(post-publication-description)s, "
"Original_description=%(original-description)s, "
"Units=%(units)s, "
"Pre_publication_abbreviation=%(pre-publication-abbreviation)s, "
"Post_publication_abbreviation=%(post-publication-abbreviation)s "
"WHERE Id=%(phenotype-id)s",
metadata)
return cursor.rowcount
def update_phenotype_values(conn, values):
"""Update a phenotype's data values."""
with conn.cursor() as cursor:
cursor.executemany(
"UPDATE PublishData SET value=%(new)s "
"WHERE Id=%(data_id)s AND StrainId=%(strain_id)s",
tuple(item for item in values if item["new"] is not None))
cursor.executemany(
"DELETE FROM PublishData "
"WHERE Id=%(data_id)s AND StrainId=%(strain_id)s",
tuple(item for item in values if item["new"] is None))
return len(values)
return 0
def update_phenotype_se(conn, serrs):
"""Update a phenotype's standard-error values."""
with conn.cursor() as cursor:
cursor.executemany(
"INSERT INTO PublishSE(DataId, StrainId, error) "
"VALUES(%(data_id)s, %(strain_id)s, %(new)s) "
"ON DUPLICATE KEY UPDATE error=VALUES(error)",
tuple(item for item in serrs if item["new"] is not None))
cursor.executemany(
"DELETE FROM PublishSE "
"WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s",
tuple(item for item in serrs if item["new"] is None))
return len(serrs)
return 0
def update_phenotype_n(conn, counts):
"""Update a phenotype's strain counts."""
with conn.cursor() as cursor:
cursor.executemany(
"INSERT INTO NStrain(DataId, StrainId, count) "
"VALUES(%(data_id)s, %(strain_id)s, %(new)s) "
"ON DUPLICATE KEY UPDATE count=VALUES(count)",
tuple(item for item in counts if item["new"] is not None))
cursor.executemany(
"DELETE FROM NStrain "
"WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s",
tuple(item for item in counts if item["new"] is None))
return len(counts)
return 0
def update_phenotype_data(conn, data: dict):
"""Update the numeric data for a phenotype."""
def __organise_by_dataid_and_strainid__(acc, current):
_key, dataid, strainid = current[0].split("::")
_keysrc, _keytype = _key.split("-")
newkey = f"{dataid}::{strainid}"
newitem = acc.get(newkey, {})
newitem[_keysrc] = newitem.get(_keysrc, {})
newitem[_keysrc][_keytype] = current[1]
return {**acc, newkey: newitem}
def __separate_items__(acc, row):
key, val = row
return ({
**acc[0],
key: {
**val["value"],
"changed?": (not val["value"]["new"] == val["value"]["original"])
}
}, {
**acc[1],
key: {
**val["se"],
"changed?": (not val["se"]["new"] == val["se"]["original"])
}
},{
**acc[2],
key: {
**val["n"],
"changed?": (not val["n"]["new"] == val["n"]["original"])
}
})
values, serrs, counts = tuple(# type: ignore[var-annotated]
tuple({
"data_id": row[0].split("::")[0],
"strain_id": row[0].split("::")[1],
"new": row[1]["new"]
} for row in item)
for item in (
filter(lambda val: val[1]["changed?"], item.items())# type: ignore[arg-type]
for item in reduce(# type: ignore[var-annotated]
__separate_items__,
reduce(__organise_by_dataid_and_strainid__,
data.items(),
{}).items(),
({}, {}, {}))))
return (update_phenotype_values(conn, values),
update_phenotype_se(conn, serrs),
update_phenotype_n(conn, counts))
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/phenotype/<int:xref_id>/edit",
methods=["GET", "POST"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def edit_phenotype_data(# pylint: disable=[unused-argument]
species: dict,
population: dict,
dataset: dict,
xref_id: int,
**kwargs
):
"""Edit the data for a particular phenotype."""
def __render__(**kwargs):
processed_kwargs = {
**kwargs,
"privileges": kwargs.get("privileges", tuple())
}
return render_template(
"phenotypes/edit-phenotype.html",
species=species,
population=population,
dataset=dataset,
xref_id=xref_id,
families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_,
**processed_kwargs,
activelink="edit-phenotype")
with database_connection(app.config["SQL_URI"]) as conn:
if request.method == "GET":
def __fetch_phenotype__(privileges):
phenotype = phenotype_by_id(conn,
species["SpeciesId"],
population["Id"],
dataset["Id"],
xref_id)
if phenotype is None:
msg = ("Could not find the phenotype with cross-reference ID"
f" '{xref_id}' from dataset '{dataset['FullName']}' "
f" from the '{population['FullName']}' population of "
f" species '{species['FullName']}'.")
return Left({"privileges": privileges, "phenotype-error": msg})
return {"privileges": privileges, "phenotype": phenotype}
def __fetch_publication_data__(**kwargs):
pheno = kwargs["phenotype"]
return {
**kwargs,
"publication_data": phenotype_publication_data(
conn, pheno["Id"])
}
def __fail__(failure_object):
# process the object
return __render__(failure_object=failure_object)
return oauth2_post(
"/auth/resource/phenotypes/individual/linked-resource",
json={
"species_id": species["SpeciesId"],
"population_id": population["Id"],
"dataset_id": dataset["Id"],
"xref_id": xref_id
}
).then(
lambda resource: tuple(
privilege["privilege_id"] for role in resource["roles"]
for privilege in role["privileges"])
).then(
__fetch_phenotype__
).then(
lambda args: __fetch_publication_data__(**args)
).either(__fail__, lambda args: __render__(**args))
## POST
_change = False
match request.form.get("submit", "invalid-action"):
case "update basic metadata":
_change = update_phenotype_metadata(conn, {
key: value.strip() if bool(value.strip()) else None
for key, value in request.form.items()
if key not in ("submit",)
})
msg = "Basic metadata was updated successfully."
case "update data":
_update = update_phenotype_data(conn, {
key: value.strip() if bool(value.strip()) else None
for key, value in request.form.items()
if key not in ("submit",)
})
msg = (f"{_update[0]} value rows, {_update[1]} standard-error "
f"rows and {_update[2]} 'N' rows were updated.")
_change = any(item != 0 for item in _update)
case "update publication":
flash("NOT IMPLEMENTED: Would update publication data.", "alert-success")
case _:
flash("Invalid phenotype editing action.", "alert-danger")
if _change:
flash(msg, "alert-success")
return redirect(url_for(
"species.populations.phenotypes.view_phenotype",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"],
xref_id=xref_id))
flash("No change was made by the user.", "alert-info")
return redirect(url_for(
"species.populations.phenotypes.edit_phenotype_data",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"],
xref_id=xref_id))
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/load-data-success/<uuid:job_id>",
methods=["GET"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def load_data_success(
species: dict,
population: dict,
dataset: dict,
job_id: uuid.UUID,
**kwargs
):# pylint: disable=[unused-argument]
"""Display success page if loading data to database was successful."""
with (database_connection(app.config["SQL_URI"]) as conn,
sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"])
as jobsconn):
try:
gn2_uri = urlparse(app.config["GN2_SERVER_URL"])
job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True)
app.logger.debug("THE JOB: %s", job)
_xref_ids = tuple(
str(item) for item
in json.loads(job["metadata"].get("xref_ids", "[]")))
_publication = fetch_publication_by_id(
conn, int(job["metadata"].get("publication_id", "0")))
_search_terms = (item for item in
(str(_publication["PubMed_ID"] or ""),
_publication["Authors"],
(_publication["Title"] or ""))
if item != "")
return render_template(
"phenotypes/load-phenotypes-success.html",
species=species,
population=population,
dataset=dataset,
job=job,
search_page_uri=urlunparse(ParseResult(
scheme=gn2_uri.scheme,
netloc=gn2_uri.netloc,
path="/search",
params="",
query=urlencode({
"species": species["Name"],
"group": population["Name"],
"type": "Phenotypes",
"dataset": dataset["Name"],
"search_terms_or": (
# Very long URLs will cause
# errors.
" ".join(_xref_ids)
if len(_xref_ids) <= 100
else ""),
"search_terms_and": " ".join(
_search_terms).strip(),
"accession_id": "None",
"FormID": "searchResult"
}),
fragment="")))
except JobNotFound as _jnf:
return render_template("jobs/job-not-found.html", job_id=job_id)
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/recompute-means",
methods=["POST"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def recompute_means(# pylint: disable=[unused-argument]
species: dict,
population: dict,
dataset: dict,
**kwargs
):
"""Compute/Recompute the means for phenotypes in a particular population."""
_jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
_job_id = uuid.uuid4()
_xref_ids = tuple(int(item.split("_")[-1])
for item in request.form.getlist("selected-phenotypes"))
_loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower()
command = [
sys.executable,
"-u",
"-m",
"scripts.compute_phenotype_means",
app.config["SQL_URI"],
_jobs_db,
str(population["Id"]),
"--log-level",
_loglevel] + (
["--cross-ref-ids", ",".join(str(_id) for _id in _xref_ids)]
if len(_xref_ids) > 0 else
[])
logger.debug("%s.recompute_means: command (%s)", __name__, command)
with sqlite3.connection(_jobs_db) as conn:
_job = gnlibs_jobs.launch_job(
gnlibs_jobs.initialise_job(
conn,
_job_id,
command,
"(re)compute-phenotype-means",
extra_meta={
"species_id": species["SpeciesId"],
"population_id": population["Id"],
"dataset_id": dataset["Id"],
"success_handler": (
"uploader.phenotypes.views."
"recompute_phenotype_means_success_handler")
},
external_id=session.logged_in_user_id()),
_jobs_db,
Path(f"{uploads_dir(app)}/job_errors"),
worker_manager="gn_libs.jobs.launcher",
loglevel=_loglevel)
return redirect(url_for("background-jobs.job_status",
job_id=_job["job_id"]))
def return_to_dataset_view_handler(job, msg: str):
"""Handler for background jobs: Returns to `View Dataset` page."""
flash(msg, "alert alert-success")
return redirect(url_for(
"species.populations.phenotypes.view_dataset",
species_id=job["metadata"]["species_id"],
population_id=job["metadata"]["population_id"],
dataset_id=job["metadata"]["dataset_id"],
job_id=job["job_id"]))
def recompute_phenotype_means_success_handler(job):
"""Handle loading new phenotypes into the database successfully."""
return return_to_dataset_view_handler(job, "Means computed successfully!")
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/rerun-qtlreaper",
methods=["POST"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def rerun_qtlreaper(# pylint: disable=[unused-argument]
species: dict,
population: dict,
dataset: dict,
**kwargs
):
"""(Re)run QTLReaper for phenotypes in a particular population."""
_jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
_job_id = uuid.uuid4()
_loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower()
_workingdir = Path(app.config["SCRATCH_DIRECTORY"]).joinpath("qtlreaper")
_workingdir.mkdir(exist_ok=True)
command = [
sys.executable,
"-u",
"-m",
"scripts.run_qtlreaper",
"--log-level", _loglevel,
app.config["SQL_URI"],
str(species["SpeciesId"]),
str(population["Id"]),
str(Path(app.config["GENOTYPE_FILES_DIRECTORY"]).joinpath(
"genotype")),
str(_workingdir)
] + [
str(_xref_id) for _xref_id in (
int(item.split("_")[-1])
for item in request.form.getlist("selected-phenotypes"))
]
logger.debug("(Re)run QTLReaper: %s", command)
with sqlite3.connection(_jobs_db) as conn:
_job_id = uuid.uuid4()
_job = gnlibs_jobs.launch_job(
gnlibs_jobs.initialise_job(
conn,
_job_id,
command,
"(re)run-qtlreaper",
extra_meta={
"species_id": species["SpeciesId"],
"population_id": population["Id"],
"dataset_id": dataset["Id"],
"success_handler": (
"uploader.phenotypes.views."
"rerun_qtlreaper_success_handler")
},
external_id=session.logged_in_user_id()),
_jobs_db,
Path(f"{uploads_dir(app)}/job_errors"),
worker_manager="gn_libs.jobs.launcher",
loglevel=_loglevel)
return redirect(url_for("background-jobs.job_status",
job_id=_job["job_id"]))
return redirect(url_for(
"background-jobs.job_status", job_id=_job["job_id"]))
def rerun_qtlreaper_success_handler(job):
"""Handle success (re)running QTLReaper script."""
return return_to_dataset_view_handler(job, "QTLReaper ran successfully!")
def delete_phenotypes_success_handler(job):
"""Handle success running the 'delete-phenotypes' script."""
return return_to_dataset_view_handler(
job, "Phenotypes deleted successfully.")
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/delete",
methods=["GET", "POST"])
@require_login
@with_dataset(
species_redirect_uri="species.populations.phenotypes.index",
population_redirect_uri="species.populations.phenotypes.select_population",
redirect_uri="species.populations.phenotypes.list_datasets")
def delete_phenotypes(# pylint: disable=[unused-argument]
species: dict,
population: dict,
dataset: dict,
**kwargs
):
"""Delete the specified phenotype data."""
_dataset_page = redirect(url_for(
"species.populations.phenotypes.view_dataset",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"]))
def __handle_error__(resp):
flash(
"Error retrieving authorisation token. Phenotype deletion "
"failed. Please try again later.",
"alert alert-danger")
return _dataset_page
_jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
with (database_connection(app.config["SQL_URI"]) as conn,
sqlite3.connection(_jobs_db) as jobsconn):
form = request.form
xref_ids = tuple(int(item) for item in set(form.getlist("xref_ids")))
match form.get("action"):
case "cancel":
return redirect(url_for(
"species.populations.phenotypes.view_dataset",
species_id=species["SpeciesId"],
population_id=population["Id"],
dataset_id=dataset["Id"]))
case "delete":
_loglevel = logging.getLevelName(
app.logger.getEffectiveLevel()).lower()
if form.get("confirm_delete_all_phenotypes", "") == "on":
_cmd = ["--delete-all"]
else:
# setup phenotypes xref_ids file
_xref_ids_file = Path(
app.config["SCRATCH_DIRECTORY"],
f"delete-phenotypes-{uuid.uuid4()}.txt")
with _xref_ids_file.open(mode="w", encoding="utf8") as ptr:
ptr.write("\n".join(str(_id) for _id in xref_ids))
_cmd = ["--xref_ids_file", str(_xref_ids_file)]
_job_id = uuid.uuid4()
return request_token(
token_uri=urljoin(
oauth2client.authserver_uri(), "auth/token"),
user_id=session.user_details()["user_id"]
).then(
lambda token: gnlibs_jobs.initialise_job(
jobsconn,
_job_id,
[
sys.executable,
"-u",
"-m",
"scripts.phenotypes.delete_phenotypes",
"--log-level", _loglevel,
app.config["SQL_URI"],
str(species["SpeciesId"]),
str(population["Id"]),
str(dataset["Id"]),
app.config["AUTH_SERVER_URL"],
token["access_token"]] + _cmd,
"delete-phenotypes",
extra_meta={
"species_id": species["SpeciesId"],
"population_id": population["Id"],
"dataset_id": dataset["Id"],
"success_handler": (
"uploader.phenotypes.views."
"delete_phenotypes_success_handler")
},
external_id=session.logged_in_user_id())
).then(
lambda _job: gnlibs_jobs.launch_job(
_job,
_jobs_db,
Path(f"{uploads_dir(app)}/job_errors"),
worker_manager="gn_libs.jobs.launcher",
loglevel=_loglevel)
).either(__handle_error__, proceed_to_job_status)
case _:
_phenos = tuple()
if len(xref_ids) > 0:
_phenos = dataset_phenotypes(
conn, population["Id"], dataset["Id"], xref_ids=xref_ids)
return render_template(
"phenotypes/confirm-delete-phenotypes.html",
species=species,
population=population,
dataset=dataset,
phenotypes=_phenos)