aboutsummaryrefslogtreecommitdiff
path: root/uploader/phenotypes
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/phenotypes')
-rw-r--r--uploader/phenotypes/misc.py26
-rw-r--r--uploader/phenotypes/models.py112
-rw-r--r--uploader/phenotypes/views.py534
3 files changed, 599 insertions, 73 deletions
diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py
new file mode 100644
index 0000000..cbe3b7f
--- /dev/null
+++ b/uploader/phenotypes/misc.py
@@ -0,0 +1,26 @@
+"""Miscellaneous functions handling phenotypes and phenotypes data."""
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def phenotypes_data_differences(
+ filedata: tuple[dict, ...], dbdata: tuple[dict, ...]
+) -> tuple[dict, ...]:
+ """Compute differences between file data and db data"""
+ diff = tuple()
+ for filerow, dbrow in zip(
+ sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])),
+ sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))):
+ for samplename, value in filerow["data"].items():
+ if value != dbrow["data"].get(samplename, {}).get("value"):
+ diff = diff + ({
+ "PhenotypeId": filerow["phenotype_id"],
+ "xref_id": filerow["xref_id"],
+ "DataId": dbrow["DataId"],
+ "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"),
+ "StrainName": samplename,
+ "value": value
+ },)
+
+ return diff
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index 73b1cce..4a229e6 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -54,6 +54,20 @@ def phenotypes_count(conn: mdb.Connection,
return int(cursor.fetchone()["total_phenos"])
+def phenotype_publication_data(conn, phenotype_id) -> Optional[dict]:
+ """Retrieve the publication data for a phenotype if it exists."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT DISTINCT pxr.PhenotypeId, pub.* FROM PublishXRef AS pxr "
+ "INNER JOIN Publication as pub ON pxr.PublicationId=pub.Id "
+ "WHERE pxr.PhenotypeId=%s",
+ (phenotype_id,))
+ res = cursor.fetchone()
+ if res is None:
+ return res
+ return dict(res)
+
+
def dataset_phenotypes(conn: mdb.Connection,
population_id: int,
dataset_id: int,
@@ -61,7 +75,7 @@ def dataset_phenotypes(conn: mdb.Connection,
limit: Optional[int] = None) -> tuple[dict, ...]:
"""Fetch the actual phenotypes."""
_query = (
- "SELECT pheno.*, pxr.Id, ist.InbredSetCode FROM Phenotype AS pheno "
+ "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, ist.InbredSetCode FROM Phenotype AS pheno "
"INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
"INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId "
"INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id "
@@ -73,31 +87,41 @@ def dataset_phenotypes(conn: mdb.Connection,
return tuple(dict(row) for row in cursor.fetchall())
-def __phenotype_se__(cursor: Cursor,
- species_id: int,
- population_id: int,
- dataset_id: int,
- xref_id: str) -> dict:
+def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids):
"""Fetch standard-error values (if they exist) for a phenotype."""
- _sequery = (
- "SELECT pxr.Id AS xref_id, pxr.DataId, str.Id AS StrainId, pse.error, nst.count "
- "FROM Phenotype AS pheno "
- "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
- "INNER JOIN PublishSE AS pse ON pxr.DataId=pse.DataId "
- "INNER JOIN NStrain AS nst ON pse.DataId=nst.DataId "
- "INNER JOIN Strain AS str ON nst.StrainId=str.Id "
- "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId "
- "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId "
- "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId "
- "WHERE (str.SpeciesId, pxr.InbredSetId, pf.Id, pxr.Id)=(%s, %s, %s, %s)")
- cursor.execute(_sequery,
- (species_id, population_id, dataset_id, xref_id))
- return {(row["DataId"], row["StrainId"]): {
- "xref_id": row["xref_id"],
- "DataId": row["DataId"],
- "error": row["error"],
- "count": row["count"]
- } for row in cursor.fetchall()}
+ paramstr = ", ".join(["(%s, %s)"] * len(dataids_and_strainids))
+ flat = tuple(item for sublist in dataids_and_strainids for item in sublist)
+ cursor.execute("SELECT * FROM PublishSE WHERE (DataId, StrainId) IN "
+ f"({paramstr})",
+ flat)
+ debug_query(cursor, app.logger)
+ _se = {
+ (row["DataId"], row["StrainId"]): {
+ "DataId": row["DataId"],
+ "StrainId": row["StrainId"],
+ "error": row["error"]
+ }
+ for row in cursor.fetchall()
+ }
+
+ cursor.execute("SELECT * FROM NStrain WHERE (DataId, StrainId) IN "
+ f"({paramstr})",
+ flat)
+ debug_query(cursor, app.logger)
+ _n = {
+ (row["DataId"], row["StrainId"]): {
+ "DataId": row["DataId"],
+ "StrainId": row["StrainId"],
+ "count": row["count"]
+ }
+ for row in cursor.fetchall()
+ }
+
+ keys = set(tuple(_se.keys()) + tuple(_n.keys()))
+ return {
+ key: {"xref_id": xref_id, **_se.get(key,{}), **_n.get(key,{})}
+ for key in keys
+ }
def __organise_by_phenotype__(pheno, row):
"""Organise disparate data rows into phenotype 'objects'."""
@@ -117,6 +141,7 @@ def __organise_by_phenotype__(pheno, row):
**(_pheno["data"] if bool(_pheno) else {}),
(row["DataId"], row["StrainId"]): {
"DataId": row["DataId"],
+ "StrainId": row["StrainId"],
"mean": row["mean"],
"Locus": row["Locus"],
"LRS": row["LRS"],
@@ -170,11 +195,9 @@ def phenotype_by_id(
**_pheno,
"data": tuple(__merge_pheno_data_and_se__(
_pheno["data"],
- __phenotype_se__(cursor,
- species_id,
- population_id,
- dataset_id,
- xref_id)).values())
+ __phenotype_se__(
+ cursor, xref_id, tuple(_pheno["data"].keys()))
+ ).values())
}
if bool(_pheno) and len(_pheno.keys()) > 1:
raise Exception(
@@ -231,3 +254,32 @@ def save_new_dataset(cursor: Cursor,
params)
debug_query(cursor, app.logger)
return {**params, "Id": cursor.lastrowid}
+
+
+def phenotypes_data_by_ids(
+ conn: mdb.Connection,
+ inbred_pheno_xref: dict[str, int]
+) -> tuple[dict, ...]:
+ """Fetch all phenotype data, filtered by the `inbred_pheno_xref` mapping."""
+ _paramstr = ",".join(["(%s, %s, %s)"] * len(inbred_pheno_xref))
+ _query = ("SELECT "
+ "pub.PubMed_ID, pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode "
+ "FROM Publication AS pub "
+ "RIGHT JOIN PublishXRef AS pxr0 ON pub.Id=pxr0.PublicationId "
+ "INNER JOIN Phenotype AS pheno ON pxr0.PhenotypeId=pheno.id "
+ "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+ "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id "
+ "INNER JOIN Strain AS str ON pd.StrainId=str.Id "
+ "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId "
+ "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId "
+ "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId "
+ f"WHERE (pxr.InbredSetId, pheno.Id, pxr.Id) IN ({_paramstr}) "
+ "ORDER BY pheno.Id")
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(_query, tuple(item for row in inbred_pheno_xref
+ for item in (row["population_id"],
+ row["phenoid"],
+ row["xref_id"])))
+ debug_query(cursor, app.logger)
+ return tuple(
+ reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values())
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index 8ecd305..a18c44d 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -1,16 +1,22 @@
"""Views handling ('classical') phenotypes."""
import sys
+import csv
import uuid
import json
+import logging
import datetime
+import tempfile
+from typing import Any
from pathlib import Path
from zipfile import ZipFile
from functools import wraps, reduce
from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING
from redis import Redis
+from pymonad.either import Left
from requests.models import Response
from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
from gn_libs.mysqldb import database_connection
from flask import (flash,
request,
@@ -18,6 +24,7 @@ from flask import (flash,
jsonify,
redirect,
Blueprint,
+ send_file,
current_app as app)
# from r_qtl import r_qtl2 as rqtl2
@@ -29,12 +36,12 @@ from uploader.files import save_file#, fullpath
from uploader.ui import make_template_renderer
from uploader.oauth2.client import oauth2_post
from uploader.authorisation import require_login
+from uploader.route_utils import generic_select_population
+from uploader.datautils import safe_int, enumerate_sequence
from uploader.species.models import all_species, species_by_id
from uploader.monadic_requests import make_either_error_handler
from uploader.request_checks import with_species, with_population
-from uploader.datautils import safe_int, order_by_family, enumerate_sequence
-from uploader.population.models import (populations_by_species,
- population_by_species_and_id)
+from uploader.samples.models import samples_by_species_and_population
from uploader.input_validation import (encode_errors,
decode_errors,
is_valid_representative_name)
@@ -45,11 +52,15 @@ from .models import (dataset_by_id,
save_new_dataset,
dataset_phenotypes,
datasets_by_population,
+ phenotypes_data_by_ids,
phenotype_publication_data)
phenotypesbp = Blueprint("phenotypes", __name__)
render_template = make_template_renderer("phenotypes")
+_FAMILIES_WITH_SE_AND_N_ = (
+ "Reference Populations (replicate average, SE, N)",)
+
@phenotypesbp.route("/phenotypes", methods=["GET"])
@require_login
def index():
@@ -57,10 +68,16 @@ def index():
with database_connection(app.config["SQL_URI"]) as conn:
if not bool(request.args.get("species_id")):
return render_template("phenotypes/index.html",
- species=order_by_family(all_species(conn)),
+ species=all_species(conn),
activelink="phenotypes")
- species = species_by_id(conn, request.args.get("species_id"))
+ species_id = request.args.get("species_id")
+ if species_id == "CREATE-SPECIES":
+ return redirect(url_for(
+ "species.create_species",
+ return_to="species.populations.phenotypes.select_population"))
+
+ species = species_by_id(conn, species_id)
if not bool(species):
flash("No such species!", "alert-danger")
return redirect(url_for("species.populations.phenotypes.index"))
@@ -74,27 +91,14 @@ def index():
@with_species(redirect_uri="species.populations.phenotypes.index")
def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument]
"""Select the population for your phenotypes."""
- with database_connection(app.config["SQL_URI"]) as conn:
- if not bool(request.args.get("population_id")):
- return render_template("phenotypes/select-population.html",
- species=species,
- populations=order_by_family(
- populations_by_species(
- conn, species["SpeciesId"]),
- order_key="FamilyOrder"),
- activelink="phenotypes")
-
- population = population_by_species_and_id(
- conn, species["SpeciesId"], int(request.args["population_id"]))
- if not bool(population):
- flash("No such population found!", "alert-danger")
- return redirect(url_for(
- "species.populations.phenotypes.select_population",
- species_id=species["SpeciesId"]))
-
- return redirect(url_for("species.populations.phenotypes.list_datasets",
- species_id=species["SpeciesId"],
- population_id=population["Id"]))
+ return generic_select_population(
+ species,
+ "phenotypes/select-population.html",
+ request.args.get("population_id") or "",
+ "species.populations.phenotypes.select_population",
+ "species.populations.phenotypes.list_datasets",
+ "phenotypes",
+ "No such population found!")
@@ -192,12 +196,10 @@ def view_dataset(# pylint: disable=[unused-argument]
phenotype_count=phenotypes_count(
conn, population["Id"], dataset["Id"]),
phenotypes=enumerate_sequence(
- dataset_phenotypes(conn,
- population["Id"],
- dataset["Id"],
- offset=start_at,
- limit=count),
- start=start_at+1),
+ dataset_phenotypes(
+ conn,
+ population["Id"],
+ dataset["Id"])),
start_from=start_at,
count=count,
activelink="view-dataset")
@@ -226,20 +228,25 @@ def view_phenotype(# pylint: disable=[unused-argument]
population["Id"],
dataset["Id"],
xref_id)
+ def __non_empty__(value) -> bool:
+ if isinstance(value, str):
+ return value.strip() != ""
+ return bool(value)
+
return render_template(
"phenotypes/view-phenotype.html",
species=species,
population=population,
dataset=dataset,
+ xref_id=xref_id,
phenotype=phenotype,
- has_se=all(bool(item.get("error")) for item in phenotype["data"]),
+ has_se=any(bool(item.get("error")) for item in phenotype["data"]),
publish_data={
key.replace("_", " "): val
for key,val in
(phenotype_publication_data(conn, phenotype["Id"]) or {}).items()
if (key in ("PubMed_ID", "Authors", "Title", "Journal")
- and val is not None
- and val.strip() is not "")
+ and __non_empty__(val))
},
privileges=(privileges
### For demo! Do not commit this part
@@ -422,8 +429,7 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p
"December"),
current_month=today.strftime("%B"),
current_year=int(today.strftime("%Y")),
- families_with_se_and_n=(
- "Reference Populations (replicate average, SE, N)",),
+ families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_,
use_bundle=use_bundle,
activelink="add-phenotypes")
@@ -517,7 +523,7 @@ def job_status(
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
- "/<int:dataset_id>/review-job/<uuid:job_id>",
+ "/<int:dataset_id>/job/<uuid:job_id>/review",
methods=["GET"])
@require_login
@with_dataset(
@@ -545,11 +551,12 @@ def review_job_data(
filetype: (by_type.get(filetype, tuple())
+ ({"filename": item[0], **item[1]},))
}
- metadata = reduce(__metadata_by_type__,
- (jobs.job_files_metadata(
- rconn, jobs.jobsnamespace(), job['jobid'])
- if job else {}).items(),
- {})
+ metadata: dict[str, Any] = reduce(
+ __metadata_by_type__,
+ (jobs.job_files_metadata(
+ rconn, jobs.jobsnamespace(), job['jobid'])
+ if job else {}).items(),
+ {})
def __desc__(filetype):
match filetype:
@@ -587,3 +594,444 @@ def review_job_data(
job=job,
summary=summary,
activelink="add-phenotypes")
+
+
+def update_phenotype_metadata(conn, metadata: dict):
+ """Update a phenotype's basic metadata values."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM Phenotype WHERE Id=%(phenotype-id)s",
+ metadata)
+ res = {
+ **{
+ _key: _val for _key,_val in {
+ key.lower().replace("_", "-"): value
+ for key, value in (cursor.fetchone() or {}).items()
+ }.items()
+ if _key in metadata.keys()
+ },
+ "phenotype-id": metadata.get("phenotype-id")
+ }
+ if res == metadata:
+ return False
+
+ cursor.execute(
+ "UPDATE Phenotype SET "
+ "Pre_publication_description=%(pre-publication-description)s, "
+ "Post_publication_description=%(post-publication-description)s, "
+ "Original_description=%(original-description)s, "
+ "Units=%(units)s, "
+ "Pre_publication_abbreviation=%(pre-publication-abbreviation)s, "
+ "Post_publication_abbreviation=%(post-publication-abbreviation)s "
+ "WHERE Id=%(phenotype-id)s",
+ metadata)
+ return cursor.rowcount
+
+
+def update_phenotype_values(conn, values):
+ """Update a phenotype's data values."""
+ with conn.cursor() as cursor:
+ cursor.executemany(
+ "UPDATE PublishData SET value=%(new)s "
+ "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in values if item["new"] is not None))
+ cursor.executemany(
+ "DELETE FROM PublishData "
+ "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in values if item["new"] is None))
+ return len(values)
+ return 0
+
+
+def update_phenotype_se(conn, serrs):
+ """Update a phenotype's standard-error values."""
+ with conn.cursor() as cursor:
+ cursor.executemany(
+ "INSERT INTO PublishSE(DataId, StrainId, error) "
+ "VALUES(%(data_id)s, %(strain_id)s, %(new)s) "
+ "ON DUPLICATE KEY UPDATE error=VALUES(error)",
+ tuple(item for item in serrs if item["new"] is not None))
+ cursor.executemany(
+ "DELETE FROM PublishSE "
+ "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in serrs if item["new"] is None))
+ return len(serrs)
+ return 0
+
+
+def update_phenotype_n(conn, counts):
+ """Update a phenotype's strain counts."""
+ with conn.cursor() as cursor:
+ cursor.executemany(
+ "INSERT INTO NStrain(DataId, StrainId, count) "
+ "VALUES(%(data_id)s, %(strain_id)s, %(new)s) "
+ "ON DUPLICATE KEY UPDATE count=VALUES(count)",
+ tuple(item for item in counts if item["new"] is not None))
+ cursor.executemany(
+ "DELETE FROM NStrain "
+ "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s",
+ tuple(item for item in counts if item["new"] is None))
+ return len(counts)
+
+ return 0
+
+
+def update_phenotype_data(conn, data: dict):
+ """Update the numeric data for a phenotype."""
+ def __organise_by_dataid_and_strainid__(acc, current):
+ _key, dataid, strainid = current[0].split("::")
+ _keysrc, _keytype = _key.split("-")
+ newkey = f"{dataid}::{strainid}"
+ newitem = acc.get(newkey, {})
+ newitem[_keysrc] = newitem.get(_keysrc, {})
+ newitem[_keysrc][_keytype] = current[1]
+ return {**acc, newkey: newitem}
+
+ def __separate_items__(acc, row):
+ key, val = row
+ return ({
+ **acc[0],
+ key: {
+ **val["value"],
+ "changed?": (not val["value"]["new"] == val["value"]["original"])
+ }
+ }, {
+ **acc[1],
+ key: {
+ **val["se"],
+ "changed?": (not val["se"]["new"] == val["se"]["original"])
+ }
+ },{
+ **acc[2],
+ key: {
+ **val["n"],
+ "changed?": (not val["n"]["new"] == val["n"]["original"])
+ }
+ })
+
+ values, serrs, counts = tuple(
+ tuple({
+ "data_id": row[0].split("::")[0],
+ "strain_id": row[0].split("::")[1],
+ "new": row[1]["new"]
+ } for row in item)
+ for item in (
+ filter(lambda val: val[1]["changed?"], item.items())# type: ignore[arg-type]
+ for item in reduce(# type: ignore[var-annotated]
+ __separate_items__,
+ reduce(__organise_by_dataid_and_strainid__,
+ data.items(),
+ {}).items(),
+ ({}, {}, {}))))
+
+ return (update_phenotype_values(conn, values),
+ update_phenotype_se(conn, serrs),
+ update_phenotype_n(conn, counts))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/phenotype/<int:xref_id>/edit",
+ methods=["GET", "POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ xref_id: int,
+ **kwargs
+):
+ """Edit the data for a particular phenotype."""
+ def __render__(**kwargs):
+ processed_kwargs = {
+ **kwargs,
+ "privileges": (kwargs.get("privileges", tuple())
+ ### For demo! Do not commit this part
+ + ("group:resource:edit-resource",
+ "group:resource:delete-resource",)
+ ### END: For demo! Do not commit this part
+ )
+ }
+ return render_template(
+ "phenotypes/edit-phenotype.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ xref_id=xref_id,
+ families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_,
+ **processed_kwargs,
+ activelink="edit-phenotype")
+
+ with database_connection(app.config["SQL_URI"]) as conn:
+ if request.method == "GET":
+ def __fetch_phenotype__(privileges):
+ phenotype = phenotype_by_id(conn,
+ species["SpeciesId"],
+ population["Id"],
+ dataset["Id"],
+ xref_id)
+ if phenotype is None:
+ msg = ("Could not find the phenotype with cross-reference ID"
+ f" '{xref_id}' from dataset '{dataset['FullName']}' "
+ f" from the '{population['FullName']}' population of "
+ f" species '{species['FullName']}'.")
+ return Left({"privileges": privileges, "phenotype-error": msg})
+ return {"privileges": privileges, "phenotype": phenotype}
+
+ def __fetch_publication_data__(**kwargs):
+ pheno = kwargs["phenotype"]
+ return {
+ **kwargs,
+ "publication_data": phenotype_publication_data(
+ conn, pheno["Id"])
+ }
+
+ def __fail__(failure_object):
+ # process the object
+ return __render__(failure_object=failure_object)
+
+ return oauth2_post(
+ "/auth/resource/phenotypes/individual/linked-resource",
+ json={
+ "species_id": species["SpeciesId"],
+ "population_id": population["Id"],
+ "dataset_id": dataset["Id"],
+ "xref_id": xref_id
+ }
+ ).then(
+ lambda resource: tuple(
+ privilege["privilege_id"] for role in resource["roles"]
+ for privilege in role["privileges"])
+ ).then(
+ __fetch_phenotype__
+ ).then(
+ lambda args: __fetch_publication_data__(**args)
+ ).either(__fail__, lambda args: __render__(**args))
+
+ ## POST
+ _change = False
+ match request.form.get("submit", "invalid-action"):
+ case "update basic metadata":
+ _change = update_phenotype_metadata(conn, {
+ key: value.strip() if bool(value.strip()) else None
+ for key, value in request.form.items()
+ if key not in ("submit",)
+ })
+ msg = "Basic metadata was updated successfully."
+ case "update data":
+ _update = update_phenotype_data(conn, {
+ key: value.strip() if bool(value.strip()) else None
+ for key, value in request.form.items()
+ if key not in ("submit",)
+ })
+ msg = (f"{_update[0]} value rows, {_update[1]} standard-error "
+ f"rows and {_update[2]} 'N' rows were updated.")
+ _change = any(item != 0 for item in _update)
+ case "update publication":
+ flash("NOT IMPLEMENTED: Would update publication data.", "alert-success")
+ case _:
+ flash("Invalid phenotype editing action.", "alert-danger")
+
+ if _change:
+ flash(msg, "alert-success")
+ return redirect(url_for(
+ "species.populations.phenotypes.view_phenotype",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=dataset["Id"],
+ xref_id=xref_id))
+
+ flash("No change was made by the user.", "alert-info")
+ return redirect(url_for(
+ "species.populations.phenotypes.edit_phenotype_data",
+ species_id=species["SpeciesId"],
+ population_id=population["Id"],
+ dataset_id=dataset["Id"],
+ xref_id=xref_id))
+
+
+def process_phenotype_data_for_download(pheno: dict) -> dict:
+ """Sanitise data for download."""
+ return {
+ "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}",
+ **{
+ key: val for key, val in pheno.items()
+ if key not in ("Id", "xref_id", "data", "Units")
+ },
+ **{
+ data_item["StrainName"]: data_item["value"]
+ for data_item in pheno.get("data", {}).values()
+ }
+ }
+
+
+BULK_EDIT_COMMON_FIELDNAMES = [
+ "UniqueIdentifier",
+ "Post_publication_description",
+ "Pre_publication_abbreviation",
+ "Pre_publication_description",
+ "Original_description",
+ "Post_publication_abbreviation",
+ "PubMed_ID"
+]
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/edit-download",
+ methods=["POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_download_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):
+ formdata = request.json
+ with database_connection(app.config["SQL_URI"]) as conn:
+ samples_list = [
+ sample["Name"] for sample in samples_by_species_and_population(
+ conn, species["SpeciesId"], population["Id"])]
+ data = (
+ process_phenotype_data_for_download(pheno)
+ for pheno in phenotypes_data_by_ids(conn, tuple({
+ "population_id": population["Id"],
+ "phenoid": row["phenotype_id"],
+ "xref_id": row["xref_id"]
+ } for row in formdata)))
+
+ with (tempfile.TemporaryDirectory(
+ prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir):
+ filename = Path(tmpdir).joinpath("tempfile.tsv")
+ with open(filename, mode="w") as outfile:
+ outfile.write(
+ "# **DO NOT** delete the 'UniqueIdentifier' row. It is used "
+ "by the system to identify and edit the correct rows and "
+ "columns in the database.\n")
+ outfile.write(
+ "# The '…_description' fields are useful for you to figure out "
+ "what row you are working on. Changing any of this fields will "
+ "also update the database, so do be careful.\n")
+ outfile.write(
+ "# Leave a field empty to delete the value in the database.\n")
+ outfile.write(
+ "# Any line beginning with a '#' character is considered a "
+ "comment line. This line, and all the lines above it, are "
+ "all comment lines. Comment lines will be ignored.\n")
+ writer = csv.DictWriter(outfile,
+ fieldnames= (
+ BULK_EDIT_COMMON_FIELDNAMES +
+ samples_list),
+ dialect="excel-tab")
+ writer.writeheader()
+ writer.writerows(data)
+ outfile.flush()
+
+ return send_file(
+ filename,
+ mimetype="text/csv",
+ as_attachment=True,
+ download_name=secure_filename(f"{dataset['Name']}_data"))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/edit-upload",
+ methods=["GET", "POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_upload_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):
+ if request.method == "GET":
+ return render_template(
+ "phenotypes/bulk-edit-upload.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ activelink="edit-phenotype")
+
+ edit_file = save_file(request.files["file-upload-bulk-edit-upload"],
+ Path(app.config["UPLOAD_FOLDER"]))
+
+ from gn_libs import jobs as gnlibs_jobs
+ from gn_libs import sqlite3
+ jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+ with sqlite3.connection(jobs_db) as conn:
+ job_id = uuid.uuid4()
+ job_cmd = [
+ sys.executable, "-u",
+ "-m", "scripts.phenotypes_bulk_edit",
+ app.config["SQL_URI"],
+ jobs_db,
+ str(job_id),
+ "--log-level",
+ logging.getLevelName(
+ app.logger.getEffectiveLevel()
+ ).lower()
+ ]
+ app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd)
+ _job = gnlibs_jobs.launch_job(
+ gnlibs_jobs.initialise_job(conn,
+ job_id,
+ job_cmd,
+ "phenotype-bulk-edit",
+ extra_meta = {
+ "edit-file": str(edit_file),
+ "species-id": species["SpeciesId"],
+ "population-id": population["Id"],
+ "dataset-id": dataset["Id"]
+ }),
+ jobs_db,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors",
+ worker_manager="gn_libs.jobs.launcher")
+
+
+ return """
+ <p>The following steps need to be performed:
+ <ol>
+ <li>Check that all IDs exist</li>
+ <li>Check for mandatory values</li>
+ <li>Update descriptions in the database (where changed)</li>
+ <li>Update publications in the database (where changed):
+ <ol>
+ <li>If <strong>PubMed_ID</strong> exists in our database, simply update the
+ 'PublicationId' value in the 'PublishXRef' table.</li>
+ <li>If <strong>PubMed_ID</strong> does not exists in our database:
+ <ol>
+ <li>fetch the publication's details from PubMed using the new
+ <strong>PubMed_ID</strong> value.</li>
+ <li>create a new publication in our database using the fetched data</li>
+ <li>Update 'PublicationId' value in 'PublishXRef' with ID of newly created
+ publication</li>
+ </ol>
+ </ol>
+ </li>
+ <li>Update values in the database (where changed)</li>
+ </ol>
+ </p>
+
+ <p><strong>Note:</strong>
+ <ul>
+ <li>If a strain that did not have a value is given a value, then we need to
+ add a new cross-reference for the new DataId created.</li>
+ <li>If a strain that had a value has its value deleted and left blank, we
+ need to remove the cross-reference for the existing DataId — or, should we
+ enter the NULL value instead? Removing the cross-reference might be more
+ trouble than it is worth.</li>
+ </ul>
+ </p>
+ """