diff options
Diffstat (limited to 'uploader/phenotypes/views.py')
-rw-r--r-- | uploader/phenotypes/views.py | 357 |
1 files changed, 204 insertions, 153 deletions
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index 92a7c4b..834a450 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -4,20 +4,30 @@ import csv import uuid import json import logging -import datetime import tempfile from typing import Any from pathlib import Path from zipfile import ZipFile from functools import wraps, reduce from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING +from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode + +import datetime +from datetime import timedelta from redis import Redis from pymonad.either import Left from requests.models import Response from MySQLdb.cursors import DictCursor from werkzeug.utils import secure_filename + +from gn_libs import sqlite3 +from gn_libs import jobs as gnlibs_jobs +from gn_libs.jobs.jobs import JobNotFound from gn_libs.mysqldb import database_connection +from gn_libs import monadic_requests as mrequests + +from authlib.jose import jwt from flask import (flash, request, url_for, @@ -31,15 +41,19 @@ from flask import (flash, from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe + from uploader import jobs +from uploader import session from uploader.files import save_file#, fullpath from uploader.ui import make_template_renderer from uploader.oauth2.client import oauth2_post from uploader.authorisation import require_login +from uploader.oauth2 import jwks, client as oauth2client from uploader.route_utils import generic_select_population from uploader.datautils import safe_int, enumerate_sequence from uploader.species.models import all_species, species_by_id from uploader.monadic_requests import make_either_error_handler +from uploader.publications.models import fetch_publication_by_id from uploader.request_checks import with_species, with_population from uploader.samples.models import samples_by_species_and_population from uploader.input_validation import (encode_errors, @@ -360,10 +374,18 @@ def process_phenotypes_individual_files(error_uri): bundlepath = Path(app.config["UPLOAD_FOLDER"], f"{str(uuid.uuid4()).replace('-', '')}.zip") with ZipFile(bundlepath,mode="w") as zfile: - for rqtlkey, formkey in (("phenocovar", "phenotype-descriptions"), - ("pheno", "phenotype-data"), - ("phenose", "phenotype-se"), - ("phenonum", "phenotype-n")): + for rqtlkey, formkey, _type in ( + ("phenocovar", "phenotype-descriptions", "mandatory"), + ("pheno", "phenotype-data", "mandatory"), + ("phenose", "phenotype-se", "optional"), + ("phenonum", "phenotype-n", "optional")): + if _type == "optional" and not bool(form.get(formkey)): + # skip if an optional key does not exist. + continue + + cdata[f"{rqtlkey}_transposed"] = ( + (form.get(f"{formkey}-transposed") or "off") == "on") + if form.get("resumable-upload", False): # Chunked upload of large files was used filedata = json.loads(form[formkey]) @@ -386,6 +408,7 @@ def process_phenotypes_individual_files(error_uri): arcname=filepath.name) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] + zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2)) return bundlepath @@ -451,21 +474,18 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p # str(dataset["Id"]), str(phenobundle), "--loglevel", - { - INFO: "INFO", - ERROR: "ERROR", - DEBUG: "DEBUG", - FATAL: "FATAL", - CRITICAL: "CRITICAL", - WARNING: "WARNING" - }[app.logger.getEffectiveLevel()], + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower(), "--redisexpiry", str(_ttl_seconds)], "phenotype_qc", _ttl_seconds, {"job-metadata": json.dumps({ "speciesid": species["SpeciesId"], "populationid": population["Id"], "datasetid": dataset["Id"], - "bundle": str(phenobundle.absolute())})}), + "bundle": str(phenobundle.absolute()), + **({"publicationid": request.form["publication-id"]} + if request.form.get("publication-id") else {})})}), _redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") @@ -538,7 +558,8 @@ def review_job_data( **kwargs ):# pylint: disable=[unused-argument] """Review data one more time before entering it into the database.""" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as conn): try: job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) except jobs.JobNotFound as _jnf: @@ -586,6 +607,7 @@ def review_job_data( filetype: __summarise__(filetype, meta) for filetype,meta in metadata.items() } + _job_metadata = json.loads(job["job-metadata"]) return render_template("phenotypes/review-job-data.html", species=species, population=population, @@ -593,9 +615,126 @@ def review_job_data( job_id=job_id, job=job, summary=summary, + publication=( + fetch_publication_by_id( + conn, int(_job_metadata["publicationid"])) + if _job_metadata.get("publicationid") + else None), activelink="add-phenotypes") +def load_phenotypes_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + return redirect(url_for( + "species.populations.phenotypes.load_data_success", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-to-database", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_to_database( + species: dict, + population: dict, + dataset: dict, + **kwargs +):# pylint: disable=[unused-argument] + """Load the data from the given QC job into the database.""" + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + sqlite3.connection(jobs_db) as conn): + qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"]) + _meta = json.loads(qc_job["job-metadata"]) + load_job_id = uuid.uuid4() + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.load_phenotypes_to_db", + app.config["SQL_URI"], + jobs_db, + str(load_job_id), + "--log-level", + _loglevel + ] + + def __handle_error__(resp): + return render_template("http-error.html", *resp.json()) + + def __handle_success__(load_job): + app.logger.debug("The phenotypes loading job: %s", load_job) + return redirect(url_for( + "background-jobs.job_status", job_id=load_job["job_id"])) + + issued = datetime.datetime.now() + jwtkey = jwks.newest_jwk_with_rotation( + jwks.jwks_directory(app, "UPLOADER_SECRETS"), + int(app.config["JWKS_ROTATION_AGE_DAYS"])) + + return mrequests.post( + urljoin(oauth2client.authserver_uri(), "auth/token"), + json={ + "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", + "scope": oauth2client.SCOPE, + "assertion": jwt.encode( + header={ + "alg": "RS256", + "typ": "JWT", + "kid": jwtkey.as_dict()["kid"] + }, + payload={ + "iss": str(oauth2client.oauth2_clientid()), + "sub": str(session.user_details()["user_id"]), + "aud": urljoin(oauth2client.authserver_uri(), + "auth/token"), + # TODO: Update expiry time once fix is implemented in + # auth server. + "exp": (issued + timedelta(minutes=5)).timestamp(), + "nbf": int(issued.timestamp()), + "iat": int(issued.timestamp()), + "jti": str(uuid.uuid4()) + }, + key=jwtkey).decode("utf8"), + "client_id": oauth2client.oauth2_clientid() + } + ).then( + lambda token: gnlibs_jobs.initialise_job( + conn, + load_job_id, + command, + "load-new-phenotypes-data", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "bundle_file": _meta["bundle"], + "publication_id": _meta["publicationid"], + "authserver": oauth2client.authserver_uri(), + "token": token["access_token"], + "success_handler": ( + "uploader.phenotypes.views" + ".load_phenotypes_success_handler") + }) + ).then( + lambda job: gnlibs_jobs.launch_job( + job, + jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + ).either(__handle_error__, __handle_success__) + + def update_phenotype_metadata(conn, metadata: dict): """Update a phenotype's basic metadata values.""" with conn.cursor(cursorclass=DictCursor) as cursor: @@ -853,153 +992,65 @@ def edit_phenotype_data(# pylint: disable=[unused-argument] xref_id=xref_id)) -def process_phenotype_data_for_download(pheno: dict) -> dict: - """Sanitise data for download.""" - return { - "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}", - **{ - key: val for key, val in pheno.items() - if key not in ("Id", "xref_id", "data", "Units") - }, - **{ - data_item["StrainName"]: data_item["value"] - for data_item in pheno.get("data", {}).values() - } - } - - -BULK_EDIT_COMMON_FIELDNAMES = [ - "UniqueIdentifier", - "Post_publication_description", - "Pre_publication_abbreviation", - "Pre_publication_description", - "Original_description", - "Post_publication_abbreviation", - "PubMed_ID" -] - - @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" - "/<int:dataset_id>/edit-download", - methods=["POST"]) -@require_login -@with_dataset( - species_redirect_uri="species.populations.phenotypes.index", - population_redirect_uri="species.populations.phenotypes.select_population", - redirect_uri="species.populations.phenotypes.list_datasets") -def edit_download_phenotype_data(# pylint: disable=[unused-argument] - species: dict, - population: dict, - dataset: dict, - **kwargs -): - formdata = request.json - with database_connection(app.config["SQL_URI"]) as conn: - samples_list = [ - sample["Name"] for sample in samples_by_species_and_population( - conn, species["SpeciesId"], population["Id"])] - data = ( - process_phenotype_data_for_download(pheno) - for pheno in phenotypes_data_by_ids(conn, tuple({ - "population_id": population["Id"], - "phenoid": row["phenotype_id"], - "xref_id": row["xref_id"] - } for row in formdata))) - - with (tempfile.TemporaryDirectory( - prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir): - filename = Path(tmpdir).joinpath("tempfile.tsv") - with open(filename, mode="w") as outfile: - outfile.write( - "# **DO NOT** delete the 'UniqueIdentifier' row. It is used " - "by the system to identify and edit the correct rows and " - "columns in the database.\n") - outfile.write( - "# The '…_description' fields are useful for you to figure out " - "what row you are working on. Changing any of this fields will " - "also update the database, so do be careful.\n") - outfile.write( - "# Leave a field empty to delete the value in the database.\n") - outfile.write( - "# Any line beginning with a '#' character is considered a " - "comment line. This line, and all the lines above it, are " - "all comment lines. Comment lines will be ignored.\n") - writer = csv.DictWriter(outfile, - fieldnames= ( - BULK_EDIT_COMMON_FIELDNAMES + - samples_list), - dialect="excel-tab") - writer.writeheader() - writer.writerows(data) - outfile.flush() - - return send_file( - filename, - mimetype="text/csv", - as_attachment=True, - download_name=secure_filename(f"{dataset['Name']}_data")) - - -@phenotypesbp.route( - "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" - "/<int:dataset_id>/edit-upload", - methods=["GET", "POST"]) + "/<int:dataset_id>/load-data-success/<uuid:job_id>", + methods=["GET"]) @require_login @with_dataset( species_redirect_uri="species.populations.phenotypes.index", population_redirect_uri="species.populations.phenotypes.select_population", redirect_uri="species.populations.phenotypes.list_datasets") -def edit_upload_phenotype_data(# pylint: disable=[unused-argument] +def load_data_success( species: dict, population: dict, dataset: dict, + job_id: uuid.UUID, **kwargs -): - if request.method == "GET": - return render_template( - "phenotypes/bulk-edit-upload.html", - species=species, - population=population, - dataset=dataset, - activelink="edit-phenotype") - - edit_file = save_file(request.files["file-upload-bulk-edit-upload"], - Path(app.config["UPLOAD_FOLDER"])) - - from gn_libs import jobs as gnlibs_jobs - from gn_libs import sqlite3 - jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] - with sqlite3.connection(jobs_db) as conn: - job_id = uuid.uuid4() - job_cmd = [ - sys.executable, "-u", - "-m", "scripts.phenotypes_bulk_edit", - app.config["SQL_URI"], - jobs_db, - str(job_id), - "--log-level", - logging.getLevelName( - app.logger.getEffectiveLevel() - ).lower() - ] - app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) - _job = gnlibs_jobs.launch_job( - gnlibs_jobs.initialise_job(conn, - job_id, - job_cmd, - "phenotype-bulk-edit", - extra_meta = { - "edit-file": str(edit_file), - "species-id": species["SpeciesId"], - "population-id": population["Id"], - "dataset-id": dataset["Id"] +):# pylint: disable=[unused-argument] + with (database_connection(app.config["SQL_URI"]) as conn, + sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) + as jobsconn): + try: + gn2_uri = urlparse(app.config["GN2_SERVER_URL"]) + job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True) + app.logger.debug("THE JOB: %s", job) + _xref_ids = tuple( + str(item) for item + in json.loads(job["metadata"].get("xref_ids", "[]"))) + _publication = fetch_publication_by_id( + conn, int(job["metadata"].get("publication_id", "0"))) + _search_terms = (item for item in + (str(_publication["PubMed_ID"] or ""), + _publication["Authors"], + (_publication["Title"] or "")) + if item != "") + return render_template("phenotypes/load-phenotypes-success.html", + species=species, + population=population, + dataset=dataset, + job=job, + search_page_uri=urlunparse(ParseResult( + scheme=gn2_uri.scheme, + netloc=gn2_uri.netloc, + path="/search", + params="", + query=urlencode({ + "species": species["Name"], + "group": population["Name"], + "type": "Phenotypes", + "dataset": dataset["Name"], + "search_terms_or": ( + # Very long URLs will cause + # errors. + " ".join(_xref_ids) + if len(_xref_ids) <= 100 + else ""), + "search_terms_and": " ".join( + _search_terms).strip(), + "accession_id": "None", + "FormID": "searchResult" }), - jobs_db, - f"{app.config['UPLOAD_FOLDER']}/job_errors", - worker_manager="gn_libs.jobs.launcher") - - - return redirect(url_for("background-jobs.job_status", - job_id=job_id, - job_type="phenotype-bulk-edit")) + fragment=""))) + except JobNotFound as jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) |