aboutsummaryrefslogtreecommitdiff
path: root/uploader/phenotypes/views.py
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/phenotypes/views.py')
-rw-r--r--uploader/phenotypes/views.py235
1 files changed, 209 insertions, 26 deletions
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index ddec54c..a18c44d 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -1,8 +1,11 @@
"""Views handling ('classical') phenotypes."""
import sys
+import csv
import uuid
import json
+import logging
import datetime
+import tempfile
from typing import Any
from pathlib import Path
from zipfile import ZipFile
@@ -13,6 +16,7 @@ from redis import Redis
from pymonad.either import Left
from requests.models import Response
from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
from gn_libs.mysqldb import database_connection
from flask import (flash,
request,
@@ -20,6 +24,7 @@ from flask import (flash,
jsonify,
redirect,
Blueprint,
+ send_file,
current_app as app)
# from r_qtl import r_qtl2 as rqtl2
@@ -31,12 +36,12 @@ from uploader.files import save_file#, fullpath
from uploader.ui import make_template_renderer
from uploader.oauth2.client import oauth2_post
from uploader.authorisation import require_login
+from uploader.route_utils import generic_select_population
+from uploader.datautils import safe_int, enumerate_sequence
from uploader.species.models import all_species, species_by_id
from uploader.monadic_requests import make_either_error_handler
from uploader.request_checks import with_species, with_population
-from uploader.datautils import safe_int, order_by_family, enumerate_sequence
-from uploader.population.models import (populations_by_species,
- population_by_species_and_id)
+from uploader.samples.models import samples_by_species_and_population
from uploader.input_validation import (encode_errors,
decode_errors,
is_valid_representative_name)
@@ -47,6 +52,7 @@ from .models import (dataset_by_id,
save_new_dataset,
dataset_phenotypes,
datasets_by_population,
+ phenotypes_data_by_ids,
phenotype_publication_data)
phenotypesbp = Blueprint("phenotypes", __name__)
@@ -62,10 +68,16 @@ def index():
with database_connection(app.config["SQL_URI"]) as conn:
if not bool(request.args.get("species_id")):
return render_template("phenotypes/index.html",
- species=order_by_family(all_species(conn)),
+ species=all_species(conn),
activelink="phenotypes")
- species = species_by_id(conn, request.args.get("species_id"))
+ species_id = request.args.get("species_id")
+ if species_id == "CREATE-SPECIES":
+ return redirect(url_for(
+ "species.create_species",
+ return_to="species.populations.phenotypes.select_population"))
+
+ species = species_by_id(conn, species_id)
if not bool(species):
flash("No such species!", "alert-danger")
return redirect(url_for("species.populations.phenotypes.index"))
@@ -79,27 +91,14 @@ def index():
@with_species(redirect_uri="species.populations.phenotypes.index")
def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument]
"""Select the population for your phenotypes."""
- with database_connection(app.config["SQL_URI"]) as conn:
- if not bool(request.args.get("population_id")):
- return render_template("phenotypes/select-population.html",
- species=species,
- populations=order_by_family(
- populations_by_species(
- conn, species["SpeciesId"]),
- order_key="FamilyOrder"),
- activelink="phenotypes")
-
- population = population_by_species_and_id(
- conn, species["SpeciesId"], int(request.args["population_id"]))
- if not bool(population):
- flash("No such population found!", "alert-danger")
- return redirect(url_for(
- "species.populations.phenotypes.select_population",
- species_id=species["SpeciesId"]))
-
- return redirect(url_for("species.populations.phenotypes.list_datasets",
- species_id=species["SpeciesId"],
- population_id=population["Id"]))
+ return generic_select_population(
+ species,
+ "phenotypes/select-population.html",
+ request.args.get("population_id") or "",
+ "species.populations.phenotypes.select_population",
+ "species.populations.phenotypes.list_datasets",
+ "phenotypes",
+ "No such population found!")
@@ -852,3 +851,187 @@ def edit_phenotype_data(# pylint: disable=[unused-argument]
population_id=population["Id"],
dataset_id=dataset["Id"],
xref_id=xref_id))
+
+
+def process_phenotype_data_for_download(pheno: dict) -> dict:
+ """Sanitise data for download."""
+ return {
+ "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}",
+ **{
+ key: val for key, val in pheno.items()
+ if key not in ("Id", "xref_id", "data", "Units")
+ },
+ **{
+ data_item["StrainName"]: data_item["value"]
+ for data_item in pheno.get("data", {}).values()
+ }
+ }
+
+
+BULK_EDIT_COMMON_FIELDNAMES = [
+ "UniqueIdentifier",
+ "Post_publication_description",
+ "Pre_publication_abbreviation",
+ "Pre_publication_description",
+ "Original_description",
+ "Post_publication_abbreviation",
+ "PubMed_ID"
+]
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/edit-download",
+ methods=["POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_download_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):
+ formdata = request.json
+ with database_connection(app.config["SQL_URI"]) as conn:
+ samples_list = [
+ sample["Name"] for sample in samples_by_species_and_population(
+ conn, species["SpeciesId"], population["Id"])]
+ data = (
+ process_phenotype_data_for_download(pheno)
+ for pheno in phenotypes_data_by_ids(conn, tuple({
+ "population_id": population["Id"],
+ "phenoid": row["phenotype_id"],
+ "xref_id": row["xref_id"]
+ } for row in formdata)))
+
+ with (tempfile.TemporaryDirectory(
+ prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir):
+ filename = Path(tmpdir).joinpath("tempfile.tsv")
+ with open(filename, mode="w") as outfile:
+ outfile.write(
+ "# **DO NOT** delete the 'UniqueIdentifier' row. It is used "
+ "by the system to identify and edit the correct rows and "
+ "columns in the database.\n")
+ outfile.write(
+ "# The '…_description' fields are useful for you to figure out "
+ "what row you are working on. Changing any of this fields will "
+ "also update the database, so do be careful.\n")
+ outfile.write(
+ "# Leave a field empty to delete the value in the database.\n")
+ outfile.write(
+ "# Any line beginning with a '#' character is considered a "
+ "comment line. This line, and all the lines above it, are "
+ "all comment lines. Comment lines will be ignored.\n")
+ writer = csv.DictWriter(outfile,
+ fieldnames= (
+ BULK_EDIT_COMMON_FIELDNAMES +
+ samples_list),
+ dialect="excel-tab")
+ writer.writeheader()
+ writer.writerows(data)
+ outfile.flush()
+
+ return send_file(
+ filename,
+ mimetype="text/csv",
+ as_attachment=True,
+ download_name=secure_filename(f"{dataset['Name']}_data"))
+
+
+@phenotypesbp.route(
+ "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+ "/<int:dataset_id>/edit-upload",
+ methods=["GET", "POST"])
+@require_login
+@with_dataset(
+ species_redirect_uri="species.populations.phenotypes.index",
+ population_redirect_uri="species.populations.phenotypes.select_population",
+ redirect_uri="species.populations.phenotypes.list_datasets")
+def edit_upload_phenotype_data(# pylint: disable=[unused-argument]
+ species: dict,
+ population: dict,
+ dataset: dict,
+ **kwargs
+):
+ if request.method == "GET":
+ return render_template(
+ "phenotypes/bulk-edit-upload.html",
+ species=species,
+ population=population,
+ dataset=dataset,
+ activelink="edit-phenotype")
+
+ edit_file = save_file(request.files["file-upload-bulk-edit-upload"],
+ Path(app.config["UPLOAD_FOLDER"]))
+
+ from gn_libs import jobs as gnlibs_jobs
+ from gn_libs import sqlite3
+ jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+ with sqlite3.connection(jobs_db) as conn:
+ job_id = uuid.uuid4()
+ job_cmd = [
+ sys.executable, "-u",
+ "-m", "scripts.phenotypes_bulk_edit",
+ app.config["SQL_URI"],
+ jobs_db,
+ str(job_id),
+ "--log-level",
+ logging.getLevelName(
+ app.logger.getEffectiveLevel()
+ ).lower()
+ ]
+ app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd)
+ _job = gnlibs_jobs.launch_job(
+ gnlibs_jobs.initialise_job(conn,
+ job_id,
+ job_cmd,
+ "phenotype-bulk-edit",
+ extra_meta = {
+ "edit-file": str(edit_file),
+ "species-id": species["SpeciesId"],
+ "population-id": population["Id"],
+ "dataset-id": dataset["Id"]
+ }),
+ jobs_db,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors",
+ worker_manager="gn_libs.jobs.launcher")
+
+
+ return """
+ <p>The following steps need to be performed:
+ <ol>
+ <li>Check that all IDs exist</li>
+ <li>Check for mandatory values</li>
+ <li>Update descriptions in the database (where changed)</li>
+ <li>Update publications in the database (where changed):
+ <ol>
+ <li>If <strong>PubMed_ID</strong> exists in our database, simply update the
+ 'PublicationId' value in the 'PublishXRef' table.</li>
+ <li>If <strong>PubMed_ID</strong> does not exists in our database:
+ <ol>
+ <li>fetch the publication's details from PubMed using the new
+ <strong>PubMed_ID</strong> value.</li>
+ <li>create a new publication in our database using the fetched data</li>
+ <li>Update 'PublicationId' value in 'PublishXRef' with ID of newly created
+ publication</li>
+ </ol>
+ </ol>
+ </li>
+ <li>Update values in the database (where changed)</li>
+ </ol>
+ </p>
+
+ <p><strong>Note:</strong>
+ <ul>
+ <li>If a strain that did not have a value is given a value, then we need to
+ add a new cross-reference for the new DataId created.</li>
+ <li>If a strain that had a value has its value deleted and left blank, we
+ need to remove the cross-reference for the existing DataId — or, should we
+ enter the NULL value instead? Removing the cross-reference might be more
+ trouble than it is worth.</li>
+ </ul>
+ </p>
+ """