diff options
Diffstat (limited to 'uploader/phenotypes/views.py')
| -rw-r--r-- | uploader/phenotypes/views.py | 440 |
1 files changed, 214 insertions, 226 deletions
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index 6bc7471..2afd8a3 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -1,61 +1,53 @@ -"""Views handling ('classical') phenotypes.""" +"""Views handling ('classical') phenotypes."""# pylint: disable=[too-many-lines] import sys -import csv import uuid import json import logging -import tempfile from typing import Any from pathlib import Path from zipfile import ZipFile -from urllib.parse import urljoin from functools import wraps, reduce -from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING +from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode import datetime -from datetime import timedelta from redis import Redis from pymonad.either import Left from requests.models import Response from MySQLdb.cursors import DictCursor -from werkzeug.utils import secure_filename from gn_libs import sqlite3 from gn_libs import jobs as gnlibs_jobs from gn_libs.jobs.jobs import JobNotFound from gn_libs.mysqldb import database_connection -from gn_libs import monadic_requests as mrequests -from authlib.jose import jwt from flask import (flash, request, - url_for, jsonify, redirect, Blueprint, - send_file, current_app as app) -# from r_qtl import r_qtl2 as rqtl2 from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe from uploader import jobs from uploader import session -from uploader.files import save_file#, fullpath +from uploader.files import save_file +from uploader.flask_extensions import url_for from uploader.ui import make_template_renderer from uploader.oauth2.client import oauth2_post +from uploader.oauth2.tokens import request_token from uploader.authorisation import require_login -from uploader.oauth2 import jwks, client as oauth2client +from uploader.oauth2 import client as oauth2client +from uploader.route_utils import build_next_argument from uploader.route_utils import generic_select_population from uploader.datautils import safe_int, enumerate_sequence from uploader.species.models import all_species, species_by_id from uploader.monadic_requests import make_either_error_handler from uploader.publications.models import fetch_publication_by_id from uploader.request_checks import with_species, with_population -from uploader.samples.models import samples_by_species_and_population from uploader.input_validation import (encode_errors, decode_errors, is_valid_representative_name) @@ -66,9 +58,9 @@ from .models import (dataset_by_id, save_new_dataset, dataset_phenotypes, datasets_by_population, - phenotypes_data_by_ids, phenotype_publication_data) +logger = logging.getLogger(__name__) phenotypesbp = Blueprint("phenotypes", __name__) render_template = make_template_renderer("phenotypes") @@ -242,11 +234,6 @@ def view_phenotype(# pylint: disable=[unused-argument] population["Id"], dataset["Id"], xref_id) - def __non_empty__(value) -> bool: - if isinstance(value, str): - return value.strip() != "" - return bool(value) - return render_template( "phenotypes/view-phenotype.html", species=species, @@ -255,19 +242,14 @@ def view_phenotype(# pylint: disable=[unused-argument] xref_id=xref_id, phenotype=phenotype, has_se=any(bool(item.get("error")) for item in phenotype["data"]), - publish_data={ - key.replace("_", " "): val - for key,val in - (phenotype_publication_data(conn, phenotype["Id"]) or {}).items() - if (key in ("PubMed_ID", "Authors", "Title", "Journal") - and __non_empty__(val)) - }, - privileges=(privileges - ### For demo! Do not commit this part - + ("group:resource:edit-resource", - "group:resource:delete-resource",) - ### END: For demo! Do not commit this part - ), + publication=(phenotype_publication_data(conn, phenotype["Id"]) or {}), + privileges=privileges, + next=build_next_argument( + uri="species.populations.phenotypes.view_phenotype", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + xref_id=xref_id), activelink="view-phenotype") def __fail__(error): @@ -374,10 +356,17 @@ def process_phenotypes_individual_files(error_uri): bundlepath = Path(app.config["UPLOAD_FOLDER"], f"{str(uuid.uuid4()).replace('-', '')}.zip") with ZipFile(bundlepath,mode="w") as zfile: - for rqtlkey, formkey in (("phenocovar", "phenotype-descriptions"), - ("pheno", "phenotype-data"), - ("phenose", "phenotype-se"), - ("phenonum", "phenotype-n")): + for rqtlkey, formkey, _type in ( + ("phenocovar", "phenotype-descriptions", "mandatory"), + ("pheno", "phenotype-data", "mandatory"), + ("phenose", "phenotype-se", "optional"), + ("phenonum", "phenotype-n", "optional")): + if _type == "optional" and not bool(form.get(formkey)): + continue # skip if an optional key does not exist. + + cdata[f"{rqtlkey}_transposed"] = ( + (form.get(f"{formkey}-transposed") or "off") == "on") + if form.get("resumable-upload", False): # Chunked upload of large files was used filedata = json.loads(form[formkey]) @@ -386,7 +375,7 @@ def process_phenotypes_individual_files(error_uri): arcname=filedata["original-name"]) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]] else: - # TODO: Check this path: fix any bugs. + # T0DO: Check this path: fix any bugs. _sentfile = request.files[formkey] if not bool(_sentfile): flash(f"Expected file ('{formkey}') was not provided.", @@ -400,6 +389,7 @@ def process_phenotypes_individual_files(error_uri): arcname=filepath.name) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] + zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2)) return bundlepath @@ -424,10 +414,7 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p dataset_id=dataset["Id"])) _redisuri = app.config["REDIS_URL"] _sqluri = app.config["SQL_URI"] - with (Redis.from_url(_redisuri, decode_responses=True) as rconn, - # database_connection(_sqluri) as conn, - # conn.cursor(cursorclass=DictCursor) as cursor - ): + with Redis.from_url(_redisuri, decode_responses=True) as rconn: if request.method == "GET": today = datetime.date.today() return render_template( @@ -462,7 +449,6 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p [sys.executable, "-m", "scripts.rqtl2.phenotypes_qc", _sqluri, _redisuri, _namespace, str(_jobid), str(species["SpeciesId"]), str(population["Id"]), - # str(dataset["Id"]), str(phenobundle), "--loglevel", logging.getLevelName( @@ -640,12 +626,16 @@ def load_data_to_database( **kwargs ):# pylint: disable=[unused-argument] """Load the data from the given QC job into the database.""" - jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, - sqlite3.connection(jobs_db) as conn): + sqlite3.connection(_jobs_db) as conn): + # T0DO: Maybe break the connection between the jobs here, pass: + # - the bundle name (rebuild the full path here.) + # - publication details, where separate + # - details about the files: e.g. total lines, etc qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"]) _meta = json.loads(qc_job["job-metadata"]) - load_job_id = uuid.uuid4() + _load_job_id = uuid.uuid4() _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() command = [ sys.executable, @@ -653,8 +643,8 @@ def load_data_to_database( "-m", "scripts.load_phenotypes_to_db", app.config["SQL_URI"], - jobs_db, - str(load_job_id), + _jobs_db, + str(_load_job_id), "--log-level", _loglevel ] @@ -667,41 +657,14 @@ def load_data_to_database( return redirect(url_for( "background-jobs.job_status", job_id=load_job["job_id"])) - issued = datetime.datetime.now() - jwtkey = jwks.newest_jwk_with_rotation( - jwks.jwks_directory(app, "UPLOADER_SECRETS"), - int(app.config["JWKS_ROTATION_AGE_DAYS"])) - return mrequests.post( - urljoin(oauth2client.authserver_uri(), "auth/token"), - json={ - "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", - "scope": oauth2client.SCOPE, - "assertion": jwt.encode( - header={ - "alg": "RS256", - "typ": "JWT", - "kid": jwtkey.as_dict()["kid"] - }, - payload={ - "iss": str(oauth2client.oauth2_clientid()), - "sub": str(session.user_details()["user_id"]), - "aud": urljoin(oauth2client.authserver_uri(), - "auth/token"), - # TODO: Update expiry time once fix is implemented in - # auth server. - "exp": (issued + timedelta(minutes=5)).timestamp(), - "nbf": int(issued.timestamp()), - "iat": int(issued.timestamp()), - "jti": str(uuid.uuid4()) - }, - key=jwtkey).decode("utf8"), - "client_id": oauth2client.oauth2_clientid() - } + return request_token( + token_uri=urljoin(oauth2client.authserver_uri(), "auth/token"), + user_id=session.user_details()["user_id"] ).then( lambda token: gnlibs_jobs.initialise_job( conn, - load_job_id, + _load_job_id, command, "load-new-phenotypes-data", extra_meta={ @@ -719,8 +682,8 @@ def load_data_to_database( ).then( lambda job: gnlibs_jobs.launch_job( job, - jobs_db, - f"{app.config['UPLOAD_FOLDER']}/job_errors", + _jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), worker_manager="gn_libs.jobs.launcher", loglevel=_loglevel) ).either(__handle_error__, __handle_success__) @@ -878,12 +841,7 @@ def edit_phenotype_data(# pylint: disable=[unused-argument] def __render__(**kwargs): processed_kwargs = { **kwargs, - "privileges": (kwargs.get("privileges", tuple()) - ### For demo! Do not commit this part - + ("group:resource:edit-resource", - "group:resource:delete-resource",) - ### END: For demo! Do not commit this part - ) + "privileges": kwargs.get("privileges", tuple()) } return render_template( "phenotypes/edit-phenotype.html", @@ -983,181 +941,211 @@ def edit_phenotype_data(# pylint: disable=[unused-argument] xref_id=xref_id)) -def process_phenotype_data_for_download(pheno: dict) -> dict: - """Sanitise data for download.""" - return { - "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}", - **{ - key: val for key, val in pheno.items() - if key not in ("Id", "xref_id", "data", "Units") - }, - **{ - data_item["StrainName"]: data_item["value"] - for data_item in pheno.get("data", {}).values() - } - } - - -BULK_EDIT_COMMON_FIELDNAMES = [ - "UniqueIdentifier", - "Post_publication_description", - "Pre_publication_abbreviation", - "Pre_publication_description", - "Original_description", - "Post_publication_abbreviation", - "PubMed_ID" -] - - @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" - "/<int:dataset_id>/edit-download", - methods=["POST"]) + "/<int:dataset_id>/load-data-success/<uuid:job_id>", + methods=["GET"]) @require_login @with_dataset( species_redirect_uri="species.populations.phenotypes.index", population_redirect_uri="species.populations.phenotypes.select_population", redirect_uri="species.populations.phenotypes.list_datasets") -def edit_download_phenotype_data(# pylint: disable=[unused-argument] +def load_data_success( species: dict, population: dict, dataset: dict, + job_id: uuid.UUID, **kwargs -): - formdata = request.json - with database_connection(app.config["SQL_URI"]) as conn: - samples_list = [ - sample["Name"] for sample in samples_by_species_and_population( - conn, species["SpeciesId"], population["Id"])] - data = ( - process_phenotype_data_for_download(pheno) - for pheno in phenotypes_data_by_ids(conn, tuple({ - "population_id": population["Id"], - "phenoid": row["phenotype_id"], - "xref_id": row["xref_id"] - } for row in formdata))) - - with (tempfile.TemporaryDirectory( - prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir): - filename = Path(tmpdir).joinpath("tempfile.tsv") - with open(filename, mode="w") as outfile: - outfile.write( - "# **DO NOT** delete the 'UniqueIdentifier' row. It is used " - "by the system to identify and edit the correct rows and " - "columns in the database.\n") - outfile.write( - "# The '…_description' fields are useful for you to figure out " - "what row you are working on. Changing any of this fields will " - "also update the database, so do be careful.\n") - outfile.write( - "# Leave a field empty to delete the value in the database.\n") - outfile.write( - "# Any line beginning with a '#' character is considered a " - "comment line. This line, and all the lines above it, are " - "all comment lines. Comment lines will be ignored.\n") - writer = csv.DictWriter(outfile, - fieldnames= ( - BULK_EDIT_COMMON_FIELDNAMES + - samples_list), - dialect="excel-tab") - writer.writeheader() - writer.writerows(data) - outfile.flush() - - return send_file( - filename, - mimetype="text/csv", - as_attachment=True, - download_name=secure_filename(f"{dataset['Name']}_data")) +):# pylint: disable=[unused-argument] + """Display success page if loading data to database was successful.""" + with (database_connection(app.config["SQL_URI"]) as conn, + sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) + as jobsconn): + try: + gn2_uri = urlparse(app.config["GN2_SERVER_URL"]) + job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True) + app.logger.debug("THE JOB: %s", job) + _xref_ids = tuple( + str(item) for item + in json.loads(job["metadata"].get("xref_ids", "[]"))) + _publication = fetch_publication_by_id( + conn, int(job["metadata"].get("publication_id", "0"))) + _search_terms = (item for item in + (str(_publication["PubMed_ID"] or ""), + _publication["Authors"], + (_publication["Title"] or "")) + if item != "") + return render_template("phenotypes/load-phenotypes-success.html", + species=species, + population=population, + dataset=dataset, + job=job, + search_page_uri=urlunparse(ParseResult( + scheme=gn2_uri.scheme, + netloc=gn2_uri.netloc, + path="/search", + params="", + query=urlencode({ + "species": species["Name"], + "group": population["Name"], + "type": "Phenotypes", + "dataset": dataset["Name"], + "search_terms_or": ( + # Very long URLs will cause + # errors. + " ".join(_xref_ids) + if len(_xref_ids) <= 100 + else ""), + "search_terms_and": " ".join( + _search_terms).strip(), + "accession_id": "None", + "FormID": "searchResult" + }), + fragment=""))) + except JobNotFound as _jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" - "/<int:dataset_id>/edit-upload", - methods=["GET", "POST"]) + "/<int:dataset_id>/recompute-means", + methods=["POST"]) @require_login @with_dataset( species_redirect_uri="species.populations.phenotypes.index", population_redirect_uri="species.populations.phenotypes.select_population", redirect_uri="species.populations.phenotypes.list_datasets") -def edit_upload_phenotype_data(# pylint: disable=[unused-argument] +def recompute_means(# pylint: disable=[unused-argument] species: dict, population: dict, dataset: dict, **kwargs ): - if request.method == "GET": - return render_template( - "phenotypes/bulk-edit-upload.html", - species=species, - population=population, - dataset=dataset, - activelink="edit-phenotype") - - edit_file = save_file(request.files["file-upload-bulk-edit-upload"], - Path(app.config["UPLOAD_FOLDER"])) - - jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] - with sqlite3.connection(jobs_db) as conn: - job_id = uuid.uuid4() - job_cmd = [ - sys.executable, "-u", - "-m", "scripts.phenotypes_bulk_edit", - app.config["SQL_URI"], - jobs_db, - str(job_id), - "--log-level", - logging.getLevelName( - app.logger.getEffectiveLevel() - ).lower() - ] - app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) + """Compute/Recompute the means for phenotypes in a particular population.""" + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + _job_id = uuid.uuid4() + _xref_ids = tuple(int(item.split("_")[-1]) + for item in request.form.getlist("selected-phenotypes")) + + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.compute_phenotype_means", + app.config["SQL_URI"], + _jobs_db, + str(population["Id"]), + "--log-level", + _loglevel] + ( + ["--cross-ref-ids", ",".join(str(_id) for _id in _xref_ids)] + if len(_xref_ids) > 0 else + []) + logger.debug("%s.recompute_means: command (%s)", __name__, command) + + with sqlite3.connection(_jobs_db) as conn: _job = gnlibs_jobs.launch_job( - gnlibs_jobs.initialise_job(conn, - job_id, - job_cmd, - "phenotype-bulk-edit", - extra_meta = { - "edit-file": str(edit_file), - "species-id": species["SpeciesId"], - "population-id": population["Id"], - "dataset-id": dataset["Id"] - }), - jobs_db, - f"{app.config['UPLOAD_FOLDER']}/job_errors", - worker_manager="gn_libs.jobs.launcher") - + gnlibs_jobs.initialise_job( + conn, + _job_id, + command, + "(re)compute-phenotype-means", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "success_handler": ( + "uploader.phenotypes.views." + "recompute_phenotype_means_success_handler") + }), + _jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + return redirect(url_for("background-jobs.job_status", + job_id=_job["job_id"])) + + +def return_to_dataset_view_handler(job, msg: str): + flash(msg, "alert alert-success") + return redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) - return redirect(url_for("background-jobs.job_status", - job_id=job_id, - job_type="phenotype-bulk-edit")) +def recompute_phenotype_means_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + return return_to_dataset_view_handler(job, "Means computed successfully!") @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" - "/<int:dataset_id>/load-data-success/<uuid:job_id>", - methods=["GET"]) + "/<int:dataset_id>/rerun-qtlreaper", + methods=["POST"]) @require_login @with_dataset( species_redirect_uri="species.populations.phenotypes.index", population_redirect_uri="species.populations.phenotypes.select_population", redirect_uri="species.populations.phenotypes.list_datasets") -def load_data_success( +def rerun_qtlreaper(# pylint: disable=[unused-argument] species: dict, population: dict, dataset: dict, - job_id: uuid.UUID, **kwargs -):# pylint: disable=[unused-argument] - with sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) as conn: - try: - job = gnlibs_jobs.job(conn, job_id, fulldetails=True) - app.logger.debug("THE JOB: %s", job) - return render_template("phenotypes/load-phenotypes-success.html", - species=species, - population=population, - dataset=dataset, - job=job, - gn2_server_url=app.config["GN2_SERVER_URL"]) - except JobNotFound as jnf: - return render_template("jobs/job-not-found.html", job_id=job_id) +): + """(Re)run QTLReaper for phenotypes in a particular population.""" + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + _job_id = uuid.uuid4() + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + + _workingdir = Path(app.config["TEMPORARY_DIRECTORY"]).joinpath("qtlreaper") + _workingdir.mkdir(exist_ok=True) + command = [ + sys.executable, + "-u", + "-m", + "scripts.run_qtlreaper", + "--log-level", _loglevel, + app.config["SQL_URI"], + str(species["SpeciesId"]), + str(population["Id"]), + str(Path(app.config["GENOTYPE_FILES_DIRECTORY"]).joinpath( + "genotype")), + str(_workingdir) + ] + [ + str(_xref_id) for _xref_id in ( + int(item.split("_")[-1]) + for item in request.form.getlist("selected-phenotypes")) + ] + logger.debug("(Re)run QTLReaper: %s", command) + with sqlite3.connection(_jobs_db) as conn: + _job_id = uuid.uuid4() + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job( + conn, + _job_id, + command, + "(re)run-qtlreaper", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "success_handler": ( + "uploader.phenotypes.views." + "rerun_qtlreaper_success_handler") + }), + _jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + return redirect(url_for("background-jobs.job_status", + job_id=_job["job_id"])) + return redirect(url_for( + "background-jobs.job_status", job_id=_job["job_id"])) + + +def rerun_qtlreaper_success_handler(job): + """Handle success (re)running QTLReaper script.""" + return return_to_dataset_view_handler(job, "QTLReaper ran successfully!") |
