diff options
Diffstat (limited to 'uploader/phenotypes/views.py')
-rw-r--r-- | uploader/phenotypes/views.py | 235 |
1 files changed, 209 insertions, 26 deletions
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index ddec54c..a18c44d 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -1,8 +1,11 @@ """Views handling ('classical') phenotypes.""" import sys +import csv import uuid import json +import logging import datetime +import tempfile from typing import Any from pathlib import Path from zipfile import ZipFile @@ -13,6 +16,7 @@ from redis import Redis from pymonad.either import Left from requests.models import Response from MySQLdb.cursors import DictCursor +from werkzeug.utils import secure_filename from gn_libs.mysqldb import database_connection from flask import (flash, request, @@ -20,6 +24,7 @@ from flask import (flash, jsonify, redirect, Blueprint, + send_file, current_app as app) # from r_qtl import r_qtl2 as rqtl2 @@ -31,12 +36,12 @@ from uploader.files import save_file#, fullpath from uploader.ui import make_template_renderer from uploader.oauth2.client import oauth2_post from uploader.authorisation import require_login +from uploader.route_utils import generic_select_population +from uploader.datautils import safe_int, enumerate_sequence from uploader.species.models import all_species, species_by_id from uploader.monadic_requests import make_either_error_handler from uploader.request_checks import with_species, with_population -from uploader.datautils import safe_int, order_by_family, enumerate_sequence -from uploader.population.models import (populations_by_species, - population_by_species_and_id) +from uploader.samples.models import samples_by_species_and_population from uploader.input_validation import (encode_errors, decode_errors, is_valid_representative_name) @@ -47,6 +52,7 @@ from .models import (dataset_by_id, save_new_dataset, dataset_phenotypes, datasets_by_population, + phenotypes_data_by_ids, phenotype_publication_data) phenotypesbp = Blueprint("phenotypes", __name__) @@ -62,10 +68,16 @@ def index(): with database_connection(app.config["SQL_URI"]) as conn: if not bool(request.args.get("species_id")): return render_template("phenotypes/index.html", - species=order_by_family(all_species(conn)), + species=all_species(conn), activelink="phenotypes") - species = species_by_id(conn, request.args.get("species_id")) + species_id = request.args.get("species_id") + if species_id == "CREATE-SPECIES": + return redirect(url_for( + "species.create_species", + return_to="species.populations.phenotypes.select_population")) + + species = species_by_id(conn, species_id) if not bool(species): flash("No such species!", "alert-danger") return redirect(url_for("species.populations.phenotypes.index")) @@ -79,27 +91,14 @@ def index(): @with_species(redirect_uri="species.populations.phenotypes.index") def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument] """Select the population for your phenotypes.""" - with database_connection(app.config["SQL_URI"]) as conn: - if not bool(request.args.get("population_id")): - return render_template("phenotypes/select-population.html", - species=species, - populations=order_by_family( - populations_by_species( - conn, species["SpeciesId"]), - order_key="FamilyOrder"), - activelink="phenotypes") - - population = population_by_species_and_id( - conn, species["SpeciesId"], int(request.args["population_id"])) - if not bool(population): - flash("No such population found!", "alert-danger") - return redirect(url_for( - "species.populations.phenotypes.select_population", - species_id=species["SpeciesId"])) - - return redirect(url_for("species.populations.phenotypes.list_datasets", - species_id=species["SpeciesId"], - population_id=population["Id"])) + return generic_select_population( + species, + "phenotypes/select-population.html", + request.args.get("population_id") or "", + "species.populations.phenotypes.select_population", + "species.populations.phenotypes.list_datasets", + "phenotypes", + "No such population found!") @@ -852,3 +851,187 @@ def edit_phenotype_data(# pylint: disable=[unused-argument] population_id=population["Id"], dataset_id=dataset["Id"], xref_id=xref_id)) + + +def process_phenotype_data_for_download(pheno: dict) -> dict: + """Sanitise data for download.""" + return { + "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}", + **{ + key: val for key, val in pheno.items() + if key not in ("Id", "xref_id", "data", "Units") + }, + **{ + data_item["StrainName"]: data_item["value"] + for data_item in pheno.get("data", {}).values() + } + } + + +BULK_EDIT_COMMON_FIELDNAMES = [ + "UniqueIdentifier", + "Post_publication_description", + "Pre_publication_abbreviation", + "Pre_publication_description", + "Original_description", + "Post_publication_abbreviation", + "PubMed_ID" +] + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/edit-download", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_download_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + formdata = request.json + with database_connection(app.config["SQL_URI"]) as conn: + samples_list = [ + sample["Name"] for sample in samples_by_species_and_population( + conn, species["SpeciesId"], population["Id"])] + data = ( + process_phenotype_data_for_download(pheno) + for pheno in phenotypes_data_by_ids(conn, tuple({ + "population_id": population["Id"], + "phenoid": row["phenotype_id"], + "xref_id": row["xref_id"] + } for row in formdata))) + + with (tempfile.TemporaryDirectory( + prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir): + filename = Path(tmpdir).joinpath("tempfile.tsv") + with open(filename, mode="w") as outfile: + outfile.write( + "# **DO NOT** delete the 'UniqueIdentifier' row. It is used " + "by the system to identify and edit the correct rows and " + "columns in the database.\n") + outfile.write( + "# The '…_description' fields are useful for you to figure out " + "what row you are working on. Changing any of this fields will " + "also update the database, so do be careful.\n") + outfile.write( + "# Leave a field empty to delete the value in the database.\n") + outfile.write( + "# Any line beginning with a '#' character is considered a " + "comment line. This line, and all the lines above it, are " + "all comment lines. Comment lines will be ignored.\n") + writer = csv.DictWriter(outfile, + fieldnames= ( + BULK_EDIT_COMMON_FIELDNAMES + + samples_list), + dialect="excel-tab") + writer.writeheader() + writer.writerows(data) + outfile.flush() + + return send_file( + filename, + mimetype="text/csv", + as_attachment=True, + download_name=secure_filename(f"{dataset['Name']}_data")) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/edit-upload", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_upload_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + if request.method == "GET": + return render_template( + "phenotypes/bulk-edit-upload.html", + species=species, + population=population, + dataset=dataset, + activelink="edit-phenotype") + + edit_file = save_file(request.files["file-upload-bulk-edit-upload"], + Path(app.config["UPLOAD_FOLDER"])) + + from gn_libs import jobs as gnlibs_jobs + from gn_libs import sqlite3 + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with sqlite3.connection(jobs_db) as conn: + job_id = uuid.uuid4() + job_cmd = [ + sys.executable, "-u", + "-m", "scripts.phenotypes_bulk_edit", + app.config["SQL_URI"], + jobs_db, + str(job_id), + "--log-level", + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower() + ] + app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job(conn, + job_id, + job_cmd, + "phenotype-bulk-edit", + extra_meta = { + "edit-file": str(edit_file), + "species-id": species["SpeciesId"], + "population-id": population["Id"], + "dataset-id": dataset["Id"] + }), + jobs_db, + f"{app.config['UPLOAD_FOLDER']}/job_errors", + worker_manager="gn_libs.jobs.launcher") + + + return """ + <p>The following steps need to be performed: + <ol> + <li>Check that all IDs exist</li> + <li>Check for mandatory values</li> + <li>Update descriptions in the database (where changed)</li> + <li>Update publications in the database (where changed): + <ol> + <li>If <strong>PubMed_ID</strong> exists in our database, simply update the + 'PublicationId' value in the 'PublishXRef' table.</li> + <li>If <strong>PubMed_ID</strong> does not exists in our database: + <ol> + <li>fetch the publication's details from PubMed using the new + <strong>PubMed_ID</strong> value.</li> + <li>create a new publication in our database using the fetched data</li> + <li>Update 'PublicationId' value in 'PublishXRef' with ID of newly created + publication</li> + </ol> + </ol> + </li> + <li>Update values in the database (where changed)</li> + </ol> + </p> + + <p><strong>Note:</strong> + <ul> + <li>If a strain that did not have a value is given a value, then we need to + add a new cross-reference for the new DataId created.</li> + <li>If a strain that had a value has its value deleted and left blank, we + need to remove the cross-reference for the existing DataId — or, should we + enter the NULL value instead? Removing the cross-reference might be more + trouble than it is worth.</li> + </ul> + </p> + """ |