diff options
Diffstat (limited to 'uploader/phenotypes')
| -rw-r--r-- | uploader/phenotypes/misc.py | 2 | ||||
| -rw-r--r-- | uploader/phenotypes/models.py | 267 | ||||
| -rw-r--r-- | uploader/phenotypes/views.py | 441 |
3 files changed, 601 insertions, 109 deletions
diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py index cbe3b7f..1924c07 100644 --- a/uploader/phenotypes/misc.py +++ b/uploader/phenotypes/misc.py @@ -8,7 +8,7 @@ def phenotypes_data_differences( filedata: tuple[dict, ...], dbdata: tuple[dict, ...] ) -> tuple[dict, ...]: """Compute differences between file data and db data""" - diff = tuple() + diff: tuple[dict, ...] = tuple() for filerow, dbrow in zip( sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])), sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))): diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index e40155f..3946a0f 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -1,13 +1,15 @@ """Database and utility functions for phenotypes.""" +import time +import random import logging import tempfile from pathlib import Path from functools import reduce from datetime import datetime -from typing import Optional, Iterable +from typing import Union, Optional, Iterable -import MySQLdb as mdb -from MySQLdb.cursors import Cursor, DictCursor +from MySQLdb.connections import Connection +from MySQLdb.cursors import Cursor, DictCursor, BaseCursor from gn_libs.mysqldb import debug_query @@ -27,7 +29,7 @@ __PHENO_DATA_TABLES__ = { def datasets_by_population( - conn: mdb.Connection, + conn: Connection, species_id: int, population_id: int ) -> tuple[dict, ...]: @@ -42,7 +44,7 @@ def datasets_by_population( return tuple(dict(row) for row in cursor.fetchall()) -def dataset_by_id(conn: mdb.Connection, +def dataset_by_id(conn: Connection, species_id: int, population_id: int, dataset_id: int) -> dict: @@ -57,7 +59,7 @@ def dataset_by_id(conn: mdb.Connection, return dict(cursor.fetchone()) -def phenotypes_count(conn: mdb.Connection, +def phenotypes_count(conn: Connection, population_id: int, dataset_id: int) -> int: """Count the number of phenotypes in the dataset.""" @@ -85,11 +87,14 @@ def phenotype_publication_data(conn, phenotype_id) -> Optional[dict]: return dict(res) -def dataset_phenotypes(conn: mdb.Connection, - population_id: int, - dataset_id: int, - offset: int = 0, - limit: Optional[int] = None) -> tuple[dict, ...]: +def dataset_phenotypes(# pylint: disable=[too-many-arguments, too-many-positional-arguments] + conn: Connection, + population_id: int, + dataset_id: int, + offset: int = 0, + limit: Optional[int] = None, + xref_ids: tuple[int, ...] = tuple() +) -> tuple[dict, ...]: """Fetch the actual phenotypes.""" _query = ( "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, ist.InbredSetCode " @@ -98,14 +103,16 @@ def dataset_phenotypes(conn: mdb.Connection, "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id " "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( + f" AND pxr.Id IN ({', '.join(['%s'] * len(xref_ids))})" + if len(xref_ids) > 0 else "") + ( f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute(_query, (population_id, dataset_id)) + cursor.execute(_query, (population_id, dataset_id) + xref_ids) debug_query(cursor, logger) return tuple(dict(row) for row in cursor.fetchall()) -def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids): +def __phenotype_se__(cursor: BaseCursor, xref_id, dataids_and_strainids): """Fetch standard-error values (if they exist) for a phenotype.""" paramstr = ", ".join(["(%s, %s)"] * len(dataids_and_strainids)) flat = tuple(item for sublist in dataids_and_strainids for item in sublist) @@ -187,7 +194,7 @@ def __merge_pheno_data_and_se__(data, sedata) -> dict: def phenotype_by_id( - conn: mdb.Connection, + conn: Connection, species_id: int, population_id: int, dataset_id: int, @@ -225,7 +232,7 @@ def phenotype_by_id( return None -def phenotypes_data(conn: mdb.Connection, +def phenotypes_data(conn: Connection, population_id: int, dataset_id: int, offset: int = 0, @@ -248,7 +255,60 @@ def phenotypes_data(conn: mdb.Connection, return tuple(dict(row) for row in cursor.fetchall()) -def save_new_dataset(cursor: Cursor, +def phenotypes_vector_data(# pylint: disable=[too-many-arguments, too-many-positional-arguments] + conn: Connection, + species_id: int, + population_id: int, + xref_ids: tuple[int, ...] = tuple(), + offset: int = 0, + limit: Optional[int] = None +) -> dict[tuple[int, int, int], dict[str, Union[int,float]]]: + """Retrieve the vector data values for traits in the database.""" + _params: tuple[int, ...] = (species_id, population_id) + _query = ("SELECT " + "Species.Id AS SpeciesId, iset.Id AS InbredSetId, " + "pxr.Id AS xref_id, pdata.*, Strain.Id AS StrainId, " + "Strain.Name AS StrainName " + "FROM " + "Species INNER JOIN InbredSet AS iset " + "ON Species.Id=iset.SpeciesId " + "INNER JOIN PublishXRef AS pxr " + "ON iset.Id=pxr.InbredSetId " + "INNER JOIN PublishData AS pdata " + "ON pxr.DataId=pdata.Id " + "INNER JOIN Strain " + "ON pdata.StrainId=Strain.Id " + "WHERE Species.Id=%s AND iset.Id=%s") + if len(xref_ids) > 0: + _paramstr = ", ".join(["%s"] * len(xref_ids)) + _query = _query + f" AND pxr.Id IN ({_paramstr})" + _params = _params + xref_ids + + def __organise__(acc, row): + _rowid = (species_id, population_id, row["xref_id"]) + _phenodata = { + **acc.get( + _rowid, { + "species_id": species_id, + "population_id": population_id, + "xref_id": row["xref_id"] + }), + row["StrainName"]: row["value"] + } + return { + **acc, + _rowid: _phenodata + } + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + _query + (f" LIMIT {limit} OFFSET {offset}" if bool(limit) else ""), + _params) + debug_query(cursor, logger) + return reduce(__organise__, cursor.fetchall(), {}) + + +def save_new_dataset(cursor: BaseCursor, population_id: int, dataset_name: str, dataset_fullname: str, @@ -275,35 +335,6 @@ def save_new_dataset(cursor: Cursor, return {**params, "Id": cursor.lastrowid} -def phenotypes_data_by_ids( - conn: mdb.Connection, - inbred_pheno_xref: dict[str, int] -) -> tuple[dict, ...]: - """Fetch all phenotype data, filtered by the `inbred_pheno_xref` mapping.""" - _paramstr = ",".join(["(%s, %s, %s)"] * len(inbred_pheno_xref)) - _query = ("SELECT " - "pub.PubMed_ID, pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " - "FROM Publication AS pub " - "RIGHT JOIN PublishXRef AS pxr0 ON pub.Id=pxr0.PublicationId " - "INNER JOIN Phenotype AS pheno ON pxr0.PhenotypeId=pheno.id " - "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " - "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " - "INNER JOIN Strain AS str ON pd.StrainId=str.Id " - "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " - "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " - "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " - f"WHERE (pxr.InbredSetId, pheno.Id, pxr.Id) IN ({_paramstr}) " - "ORDER BY pheno.Id") - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute(_query, tuple(item for row in inbred_pheno_xref - for item in (row["population_id"], - row["phenoid"], - row["xref_id"]))) - debug_query(cursor, logger) - return tuple( - reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values()) - - def __pre_process_phenotype_data__(row): _desc = row.get("description", "") _pre_pub_desc = row.get("pre_publication_description", _desc) @@ -322,13 +353,13 @@ def __pre_process_phenotype_data__(row): def create_new_phenotypes(# pylint: disable=[too-many-locals] - conn: mdb.Connection, + conn: Connection, population_id: int, publication_id: int, phenotypes: Iterable[dict] ) -> tuple[dict, ...]: """Add entirely new phenotypes to the database. WARNING: Not thread-safe.""" - _phenos = tuple() + _phenos: tuple[dict, ...] = tuple() with conn.cursor(cursorclass=DictCursor) as cursor: def make_next_id(idcol, table): cursor.execute(f"SELECT MAX({idcol}) AS last_id FROM {table}") @@ -377,9 +408,10 @@ def create_new_phenotypes(# pylint: disable=[too-many-locals] if len(batch) == 0: break - params, abbrevs = reduce(__build_params_and_prepubabbrevs__, - batch, - (tuple(), tuple())) + params, abbrevs = reduce(#type: ignore[var-annotated] + __build_params_and_prepubabbrevs__, + batch, + (tuple(), tuple())) # Check for uniqueness for all "Pre_publication_description" values abbrevs_paramsstr = ", ".join(["%s"] * len(abbrevs)) _query = ("SELECT PublishXRef.PhenotypeId, Phenotype.* " @@ -449,7 +481,7 @@ def create_new_phenotypes(# pylint: disable=[too-many-locals] def save_phenotypes_data( - conn: mdb.Connection, + conn: Connection, table: str, data: Iterable[dict] ) -> int: @@ -479,7 +511,7 @@ def save_phenotypes_data( def quick_save_phenotypes_data( - conn: mdb.Connection, + conn: Connection, table: str, dataitems: Iterable[dict], tmpdir: Path @@ -509,3 +541,134 @@ def quick_save_phenotypes_data( ")") debug_query(cursor, logger) return _count + + +def __sleep_random__(): + """Sleep a random amount of time chosen from 0.05s to 1s in increments of 0.05""" + time.sleep(random.choice(tuple(i / 20.0 for i in range(1, 21)))) + + +def delete_phenotypes_data( + cursor: BaseCursor, + data_ids: tuple[int, ...] +) -> tuple[int, int, int]: + """Delete numeric data for phenotypes with the given data IDs.""" + if len(data_ids) == 0: + return (0, 0, 0) + + # Loop to handle big deletes i.e. ≥ 10000 rows + _dcount, _secount, _ncount = (0, 0, 0)# Count total rows deleted + while True: + _paramstr = ", ".join(["%s"] * len(data_ids)) + cursor.execute( + "DELETE FROM PublishData " + f"WHERE Id IN ({_paramstr}) " + "ORDER BY Id ASC, StrainId ASC "# Make deletions deterministic + "LIMIT 1000", + data_ids) + _dcount_curr = cursor.rowcount + _dcount += _dcount_curr + + cursor.execute( + "DELETE FROM PublishSE " + f"WHERE DataId IN ({_paramstr}) " + "ORDER BY DataId ASC, StrainId ASC "# Make deletions deterministic + "LIMIT 1000", + data_ids) + _secount_curr = cursor.rowcount + _secount += _secount_curr + + cursor.execute( + "DELETE FROM NStrain " + f"WHERE DataId IN ({_paramstr}) " + "ORDER BY DataId ASC, StrainId ASC "# Make deletions deterministic + "LIMIT 1000", + data_ids) + _ncount_curr = cursor.rowcount + _ncount += _ncount_curr + __sleep_random__() + + if all((_dcount_curr == 0, _secount_curr == 0, _ncount_curr == 0)): + # end loop if there are no more rows to delete. + break + + return (_dcount, _secount, _ncount) + + +def __linked_ids__( + cursor: BaseCursor, + population_id: int, + xref_ids: tuple[int, ...] +) -> tuple[tuple[int, int, int], ...]: + """Retrieve `DataId` values from `PublishXRef` table.""" + _paramstr = ", ".join(["%s"] * len(xref_ids)) + cursor.execute("SELECT PhenotypeId, PublicationId, DataId " + "FROM PublishXRef " + f"WHERE InbredSetId=%s AND Id IN ({_paramstr})", + (population_id,) + xref_ids) + return tuple( + (int(row["PhenotypeId"]), int(row["PublicationId"]), int(row["DataId"])) + for row in cursor.fetchall()) + + +def delete_phenotypes( + conn_or_cursor: Union[Connection, Cursor], + population_id: int, + xref_ids: tuple[int, ...] +) -> tuple[int, int, int, int]: + """Delete phenotypes and all their data.""" + def __delete_phenos__(cursor: BaseCursor, pheno_ids: tuple[int, ...]) -> int: + """Delete data from the `Phenotype` table.""" + _paramstr = ", ".join(["%s"] * len(pheno_ids)) + + _pcount = 0 + while True: + cursor.execute( + "DELETE FROM Phenotype " + f"WHERE Id IN ({_paramstr}) " + "ORDER BY Id " + "LIMIT 1000", + pheno_ids) + _pcount_curr = cursor.rowcount + _pcount += _pcount_curr + __sleep_random__() + if _pcount_curr == 0: + break + + return cursor.rowcount + + def __delete_xrefs__(cursor: BaseCursor) -> int: + _paramstr = ", ".join(["%s"] * len(xref_ids)) + + _xcount = 0 + while True: + cursor.execute( + "DELETE FROM PublishXRef " + f"WHERE InbredSetId=%s AND Id IN ({_paramstr}) " + "ORDER BY Id " + "LIMIT 10000", + (population_id,) + xref_ids) + _xcount_curr = cursor.rowcount + _xcount += _xcount_curr + __sleep_random__() + if _xcount_curr == 0: + break + + return _xcount + + def __with_cursor__(cursor): + _phenoids, _pubids, _dataids = reduce( + lambda acc, curr: (acc[0] + (curr[0],), + acc[1] + (curr[1],), + acc[2] + (curr[2],)), + __linked_ids__(cursor, population_id, xref_ids), + (tuple(), tuple(), tuple())) + __delete_phenos__(cursor, _phenoids) + return (__delete_xrefs__(cursor),) + delete_phenotypes_data( + cursor, _dataids) + + if isinstance(conn_or_cursor, BaseCursor): + return __with_cursor__(conn_or_cursor) + + with conn_or_cursor.cursor(cursorclass=DictCursor) as cursor: + return __with_cursor__(cursor) diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index 556b5ff..23bc682 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -1,4 +1,6 @@ """Views handling ('classical') phenotypes."""# pylint: disable=[too-many-lines] +import io +import csv import sys import uuid import json @@ -21,26 +23,29 @@ from gn_libs import jobs as gnlibs_jobs from gn_libs.jobs.jobs import JobNotFound from gn_libs.mysqldb import database_connection +from werkzeug.datastructures import Headers from flask import (flash, request, - url_for, jsonify, redirect, Blueprint, - current_app as app) + current_app as app, + Response as FlaskResponse) from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe - from uploader import jobs from uploader import session -from uploader.files import save_file#, fullpath +from uploader.files import save_file +from uploader.configutils import uploads_dir +from uploader.flask_extensions import url_for from uploader.ui import make_template_renderer from uploader.oauth2.client import oauth2_post from uploader.oauth2.tokens import request_token from uploader.authorisation import require_login from uploader.oauth2 import client as oauth2client +from uploader.route_utils import build_next_argument from uploader.route_utils import generic_select_population from uploader.datautils import safe_int, enumerate_sequence from uploader.species.models import all_species, species_by_id @@ -59,6 +64,7 @@ from .models import (dataset_by_id, datasets_by_population, phenotype_publication_data) +logger = logging.getLogger(__name__) phenotypesbp = Blueprint("phenotypes", __name__) render_template = make_template_renderer("phenotypes") @@ -232,11 +238,6 @@ def view_phenotype(# pylint: disable=[unused-argument] population["Id"], dataset["Id"], xref_id) - def __non_empty__(value) -> bool: - if isinstance(value, str): - return value.strip() != "" - return bool(value) - return render_template( "phenotypes/view-phenotype.html", species=species, @@ -245,14 +246,14 @@ def view_phenotype(# pylint: disable=[unused-argument] xref_id=xref_id, phenotype=phenotype, has_se=any(bool(item.get("error")) for item in phenotype["data"]), - publish_data={ - key.replace("_", " "): val - for key,val in - (phenotype_publication_data(conn, phenotype["Id"]) or {}).items() - if (key in ("PubMed_ID", "Authors", "Title", "Journal") - and __non_empty__(val)) - }, + publication=(phenotype_publication_data(conn, phenotype["Id"]) or {}), privileges=privileges, + next=build_next_argument( + uri="species.populations.phenotypes.view_phenotype", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + xref_id=xref_id), activelink="view-phenotype") def __fail__(error): @@ -333,7 +334,7 @@ def process_phenotypes_rqtl2_bundle(error_uri): try: ## Handle huge files here... phenobundle = save_file(request.files["phenotypes-bundle"], - Path(app.config["UPLOAD_FOLDER"])) + uploads_dir(app)) rqc.validate_bundle(phenobundle) return phenobundle except AssertionError as _aerr: @@ -356,7 +357,7 @@ def process_phenotypes_individual_files(error_uri): "comment.char": form["file-comment-character"], "na.strings": form["file-na"].split(" "), } - bundlepath = Path(app.config["UPLOAD_FOLDER"], + bundlepath = Path(uploads_dir(app), f"{str(uuid.uuid4()).replace('-', '')}.zip") with ZipFile(bundlepath,mode="w") as zfile: for rqtlkey, formkey, _type in ( @@ -374,7 +375,7 @@ def process_phenotypes_individual_files(error_uri): # Chunked upload of large files was used filedata = json.loads(form[formkey]) zfile.write( - Path(app.config["UPLOAD_FOLDER"], filedata["uploaded-file"]), + Path(uploads_dir(app), filedata["uploaded-file"]), arcname=filedata["original-name"]) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]] else: @@ -386,9 +387,9 @@ def process_phenotypes_individual_files(error_uri): return error_uri filepath = save_file( - _sentfile, Path(app.config["UPLOAD_FOLDER"]), hashed=False) + _sentfile, uploads_dir(app), hashed=False) zfile.write( - Path(app.config["UPLOAD_FOLDER"], filepath), + Path(uploads_dir(app), filepath), arcname=filepath.name) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] @@ -422,7 +423,8 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p today = datetime.date.today() return render_template( ("phenotypes/add-phenotypes-with-rqtl2-bundle.html" - if use_bundle else "phenotypes/add-phenotypes-raw-files.html"), + if use_bundle + else "phenotypes/add-phenotypes-raw-files.html"), species=species, population=population, dataset=dataset, @@ -467,7 +469,7 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p **({"publicationid": request.form["publication-id"]} if request.form.get("publication-id") else {})})}), _redisuri, - f"{app.config['UPLOAD_FOLDER']}/job_errors") + f"{uploads_dir(app)}/job_errors") app.logger.debug("JOB DETAILS: %s", _job) jobstatusuri = url_for("species.populations.phenotypes.job_status", @@ -508,6 +510,7 @@ def job_status( job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) except jobs.JobNotFound as _jnf: job = None + return render_template("phenotypes/job-status.html", species=species, population=population, @@ -523,6 +526,65 @@ def job_status( @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/job/<uuid:job_id>/download-errors", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def download_errors( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs):# pylint: disable=[unused-argument] + """Download the list of errors as a CSV file.""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) + _prefix_ = jobs.jobsnamespace() + _jobid_ = job['jobid'] + def __generate_chunks__(): + _errors_ = ( + json.loads(error) + for key in rconn.keys( + f"{_prefix_}:{str(_jobid_)}:*:errors:*") + for error in rconn.lrange(key, 0, -1)) + _chunk_no_ = 0 + _all_errors_printed_ = False + while not _all_errors_printed_: + _chunk_ = [] + try: + for _ in range(0, 1000): + _chunk_.append(next(_errors_)) + except StopIteration: + _all_errors_printed_ = True + if len(_chunk_) <= 0: + raise + + _out_ = io.StringIO() + _writer_ = csv.DictWriter(_out_, fieldnames=tuple(_chunk_[0].keys())) + if _chunk_no_ == 0: + _writer_.writeheader() + _writer_.writerows(_chunk_) + _chunk_no_ += 1 + yield _out_.getvalue() + if _all_errors_printed_: + return + + headers = Headers() + headers.set("Content-Disposition", + "attachment", + filename=f"{job['job-type']}_{job['jobid']}.csv") + return FlaskResponse( + __generate_chunks__(), mimetype="text/csv", headers=headers) + except jobs.JobNotFound as _jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" "/<int:dataset_id>/job/<uuid:job_id>/review", methods=["GET"]) @require_login @@ -613,6 +675,12 @@ def load_phenotypes_success_handler(job): job_id=job["job_id"])) +def proceed_to_job_status(job): + """A generic 'job success' handler for asynchronous phenotype jobs.""" + app.logger.debug("The new job: %s", job) + return redirect(url_for("background-jobs.job_status", job_id=job["job_id"])) + + @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" "/<int:dataset_id>/load-data-to-database", @@ -655,11 +723,6 @@ def load_data_to_database( def __handle_error__(resp): return render_template("http-error.html", *resp.json()) - def __handle_success__(load_job): - app.logger.debug("The phenotypes loading job: %s", load_job) - return redirect(url_for( - "background-jobs.job_status", job_id=load_job["job_id"])) - return request_token( token_uri=urljoin(oauth2client.authserver_uri(), "auth/token"), @@ -681,15 +744,16 @@ def load_data_to_database( "success_handler": ( "uploader.phenotypes.views" ".load_phenotypes_success_handler") - }) + }, + external_id=session.logged_in_user_id()) ).then( lambda job: gnlibs_jobs.launch_job( job, _jobs_db, - Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + Path(f"{uploads_dir(app)}/job_errors"), worker_manager="gn_libs.jobs.launcher", loglevel=_loglevel) - ).either(__handle_error__, __handle_success__) + ).either(__handle_error__, proceed_to_job_status) def update_phenotype_metadata(conn, metadata: dict): @@ -804,7 +868,7 @@ def update_phenotype_data(conn, data: dict): } }) - values, serrs, counts = tuple( + values, serrs, counts = tuple(# type: ignore[var-annotated] tuple({ "data_id": row[0].split("::")[0], "strain_id": row[0].split("::")[1], @@ -978,32 +1042,297 @@ def load_data_success( _publication["Authors"], (_publication["Title"] or "")) if item != "") - return render_template("phenotypes/load-phenotypes-success.html", - species=species, - population=population, - dataset=dataset, - job=job, - search_page_uri=urlunparse(ParseResult( - scheme=gn2_uri.scheme, - netloc=gn2_uri.netloc, - path="/search", - params="", - query=urlencode({ - "species": species["Name"], - "group": population["Name"], - "type": "Phenotypes", - "dataset": dataset["Name"], - "search_terms_or": ( - # Very long URLs will cause - # errors. + return render_template( + "phenotypes/load-phenotypes-success.html", + species=species, + population=population, + dataset=dataset, + job=job, + search_page_uri=urlunparse(ParseResult( + scheme=gn2_uri.scheme, + netloc=gn2_uri.netloc, + path="/search", + params="", + query=urlencode({ + "species": species["Name"], + "group": population["Name"], + "type": "Phenotypes", + "dataset": dataset["Name"], + "search_terms_or": ( + # Very long URLs will cause + # errors. " ".join(_xref_ids) if len(_xref_ids) <= 100 else ""), - "search_terms_and": " ".join( - _search_terms).strip(), - "accession_id": "None", - "FormID": "searchResult" - }), - fragment=""))) + "search_terms_and": " ".join( + _search_terms).strip(), + "accession_id": "None", + "FormID": "searchResult" + }), + fragment=""))) except JobNotFound as _jnf: return render_template("jobs/job-not-found.html", job_id=job_id) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/recompute-means", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def recompute_means(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + """Compute/Recompute the means for phenotypes in a particular population.""" + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + _job_id = uuid.uuid4() + _xref_ids = tuple(int(item.split("_")[-1]) + for item in request.form.getlist("selected-phenotypes")) + + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.compute_phenotype_means", + app.config["SQL_URI"], + _jobs_db, + str(population["Id"]), + "--log-level", + _loglevel] + ( + ["--cross-ref-ids", ",".join(str(_id) for _id in _xref_ids)] + if len(_xref_ids) > 0 else + []) + logger.debug("%s.recompute_means: command (%s)", __name__, command) + + with sqlite3.connection(_jobs_db) as conn: + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job( + conn, + _job_id, + command, + "(re)compute-phenotype-means", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "success_handler": ( + "uploader.phenotypes.views." + "recompute_phenotype_means_success_handler") + }, + external_id=session.logged_in_user_id()), + _jobs_db, + Path(f"{uploads_dir(app)}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + return redirect(url_for("background-jobs.job_status", + job_id=_job["job_id"])) + + +def return_to_dataset_view_handler(job, msg: str): + """Handler for background jobs: Returns to `View Dataset` page.""" + flash(msg, "alert alert-success") + return redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) + +def recompute_phenotype_means_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + return return_to_dataset_view_handler(job, "Means computed successfully!") + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/rerun-qtlreaper", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def rerun_qtlreaper(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + """(Re)run QTLReaper for phenotypes in a particular population.""" + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + _job_id = uuid.uuid4() + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + + _workingdir = Path(app.config["SCRATCH_DIRECTORY"]).joinpath("qtlreaper") + _workingdir.mkdir(exist_ok=True) + command = [ + sys.executable, + "-u", + "-m", + "scripts.run_qtlreaper", + "--log-level", _loglevel, + app.config["SQL_URI"], + str(species["SpeciesId"]), + str(population["Id"]), + str(Path(app.config["GENOTYPE_FILES_DIRECTORY"]).joinpath( + "genotype")), + str(_workingdir) + ] + [ + str(_xref_id) for _xref_id in ( + int(item.split("_")[-1]) + for item in request.form.getlist("selected-phenotypes")) + ] + logger.debug("(Re)run QTLReaper: %s", command) + with sqlite3.connection(_jobs_db) as conn: + _job_id = uuid.uuid4() + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job( + conn, + _job_id, + command, + "(re)run-qtlreaper", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "success_handler": ( + "uploader.phenotypes.views." + "rerun_qtlreaper_success_handler") + }, + external_id=session.logged_in_user_id()), + _jobs_db, + Path(f"{uploads_dir(app)}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + return redirect(url_for("background-jobs.job_status", + job_id=_job["job_id"])) + return redirect(url_for( + "background-jobs.job_status", job_id=_job["job_id"])) + + +def rerun_qtlreaper_success_handler(job): + """Handle success (re)running QTLReaper script.""" + return return_to_dataset_view_handler(job, "QTLReaper ran successfully!") + + +def delete_phenotypes_success_handler(job): + """Handle success running the 'delete-phenotypes' script.""" + return return_to_dataset_view_handler( + job, "Phenotypes deleted successfully.") + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/delete", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def delete_phenotypes(# pylint: disable=[unused-argument, too-many-locals] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + """Delete the specified phenotype data.""" + _dataset_page = redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"])) + + def __handle_error__(resp): + flash( + "Error retrieving authorisation token. Phenotype deletion " + "failed. Please try again later.", + "alert alert-danger") + return _dataset_page + + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with (database_connection(app.config["SQL_URI"]) as conn, + sqlite3.connection(_jobs_db) as jobsconn): + form = request.form + xref_ids = tuple(int(item) for item in set(form.getlist("xref_ids"))) + + match form.get("action"): + case "cancel": + return redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"])) + case "delete": + _loglevel = logging.getLevelName( + app.logger.getEffectiveLevel()).lower() + if form.get("confirm_delete_all_phenotypes", "") == "on": + _cmd = ["--delete-all"] + else: + # setup phenotypes xref_ids file + _xref_ids_file = Path( + app.config["SCRATCH_DIRECTORY"], + f"delete-phenotypes-{uuid.uuid4()}.txt") + with _xref_ids_file.open(mode="w", encoding="utf8") as ptr: + ptr.write("\n".join(str(_id) for _id in xref_ids)) + + _cmd = ["--xref_ids_file", str(_xref_ids_file)] + + _job_id = uuid.uuid4() + return request_token( + token_uri=urljoin( + oauth2client.authserver_uri(), "auth/token"), + user_id=session.user_details()["user_id"] + ).then( + lambda token: gnlibs_jobs.initialise_job( + jobsconn, + _job_id, + [ + sys.executable, + "-u", + "-m", + "scripts.phenotypes.delete_phenotypes", + "--log-level", _loglevel, + app.config["SQL_URI"], + str(species["SpeciesId"]), + str(population["Id"]), + str(dataset["Id"]), + app.config["AUTH_SERVER_URL"], + token["access_token"]] + _cmd, + "delete-phenotypes", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "success_handler": ( + "uploader.phenotypes.views." + "delete_phenotypes_success_handler") + }, + external_id=session.logged_in_user_id()) + ).then( + lambda _job: gnlibs_jobs.launch_job( + _job, + _jobs_db, + Path(f"{uploads_dir(app)}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + ).either(__handle_error__, proceed_to_job_status) + case _: + _phenos: tuple[dict, ...] = tuple() + if len(xref_ids) > 0: + _phenos = dataset_phenotypes( + conn, population["Id"], dataset["Id"], xref_ids=xref_ids) + + return render_template( + "phenotypes/confirm-delete-phenotypes.html", + species=species, + population=population, + dataset=dataset, + phenotypes=_phenos) |
