diff options
| -rw-r--r-- | scripts/cli_parser.py | 24 | ||||
| -rw-r--r-- | scripts/compute_phenotype_means.py | 101 | ||||
| -rw-r--r-- | scripts/load_phenotypes_to_db.py | 2 | ||||
| -rw-r--r-- | uploader/background_jobs.py | 5 | ||||
| -rw-r--r-- | uploader/phenotypes/views.py | 77 | ||||
| -rw-r--r-- | uploader/route_utils.py | 12 | ||||
| -rw-r--r-- | uploader/templates/background-jobs/default-success-page.html | 17 | ||||
| -rw-r--r-- | uploader/templates/phenotypes/view-dataset.html | 32 |
8 files changed, 243 insertions, 27 deletions
diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py index 0c91c5e..bf39731 100644 --- a/scripts/cli_parser.py +++ b/scripts/cli_parser.py @@ -3,6 +3,20 @@ from uuid import UUID from typing import Optional from argparse import ArgumentParser + +def add_logging_option(parser: ArgumentParser) -> ArgumentParser: + """Add optional log-level option""" + parser.add_argument( + "--log-level", + "--loglevel", + type=str, + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", + "debug", "info", "warning", "error", "critical"], + help="The severity of events to track with the logger.") + return parser + + def init_cli_parser(program: str, description: Optional[str] = None) -> ArgumentParser: """Initialise the CLI arguments parser.""" parser = ArgumentParser(prog=program, description=description) @@ -19,14 +33,8 @@ def init_cli_parser(program: str, description: Optional[str] = None) -> Argument type=int, default=86400, help="How long to keep any redis keys around.") - parser.add_argument( - "--loglevel", - type=str, - default="INFO", - choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", - "debug", "info", "warning", "error", "critical"], - help="The severity of events to track with the logger.") - return parser + return add_logging_option(parser) + def add_global_data_arguments(parser: ArgumentParser) -> ArgumentParser: """Add the global (present in nearly ALL scripts) CLI arguments.""" diff --git a/scripts/compute_phenotype_means.py b/scripts/compute_phenotype_means.py new file mode 100644 index 0000000..ef2fabc --- /dev/null +++ b/scripts/compute_phenotype_means.py @@ -0,0 +1,101 @@ +"""Compute phenotype means.""" +import sys +import logging +from pathlib import Path +from typing import TypeVar +from argparse import Namespace, ArgumentParser + +import MySQLdb + +from gn_libs import mysqldb +from uploader import setup_modules_logging + +from .cli_parser import add_logging_option +from .load_phenotypes_to_db import update_means + +logger = logging.getLogger(__name__) +logging.basicConfig( + encoding="utf-8", + format="%(asctime)s - %(name)s - %(levelname)s — %(message)s", + level=logging.INFO) + + +def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]: + """Fetch a population's cross-reference IDs.""" + logger.debug("Fetching the xref IDs.") + with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor: + query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s" + cursor.execute(query, {"population_id": population_id}) + return tuple(int(row["Id"]) for row in cursor.fetchall()) + + +def run(args) -> int: + """Run the script.""" + logger.debug("Running the script!") + with mysqldb.database_connection(args.db_uri) as mariadb_conn: + xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id) + if len(xref_ids): + update_means(mariadb_conn, + args.population_id, + xref_ids) + logger.debug("Successfully computed means for %02d phenotypes.", + len(xref_ids)) + return 0 + _reasons = ( + f"no population exists with the ID {args.population_id}", + "the population exists but it has no phenotypes linked to it yet") + logger.error( + "No cross-reference IDs to run against. Likely causes are: %s", + " OR ".join(_reasons) + ".") + return 1 + + +T = TypeVar("T") +def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]: + """Convert val into a list of items of type 'itemstype'.""" + return tuple(itemstype(item.strip()) for item in val.split(",")) + + +def comma_separated_list_of_integers(val: str) -> tuple[int, ...]: + """Convert 'val' into list of items of type 'int'.""" + return comma_separated_list(val, int) + + +if __name__ == "__main__": + def parse_args() -> Namespace: + """Define and parse the CLI parsers accepted by this script.""" + parser = ArgumentParser( + "compute-phenotype-means", + description="Compute/Recompute the phenotype means.") + parser.add_argument("db_uri", + metavar="db-uri", + type=str, + help="MariaDB/MySQL connection URL") + parser.add_argument("jobs_db_path", + metavar="jobs-db-path", + type=Path, + help="Path to jobs' SQLite database.") + parser.add_argument("population_id", + metavar="population-id", + type=int, + help=("Identifier for the InbredSet group/" + "population to run means against.")) + ## Optional arguments + parser = add_logging_option(parser) + parser.add_argument( + "--cross-ref-ids", + type=comma_separated_list_of_integers, + help=("Provide cross-reference IDs to narrow the number of " + "phenotypes that the means are computed against."), + default=[]) + + return parser.parse_args() + + def main() -> int: + """compute-phenotype-means: Entry-point function.""" + args = parse_args() + logger.setLevel(getattr(logging, args.log_level.upper())) + setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",)) + return run(args) + + sys.exit(main()) diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 9158307..e449b82 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -414,6 +414,7 @@ def update_means( xref_ids: tuple[int, ...] ): """Compute the means from the data and update them in the database.""" + logger.info("Computing means for %02d phenotypes.", len(xref_ids)) query = ( "UPDATE PublishXRef SET mean = " "(SELECT AVG(value) FROM PublishData" @@ -426,6 +427,7 @@ def update_means( batch = take(_xref_iterator, 10000) if len(batch) == 0: break + logger.info("\tComputing means for batch of %02d phenotypes.", len(batch)) cursor.executemany( query, tuple({ diff --git a/uploader/background_jobs.py b/uploader/background_jobs.py index dc9f837..d33c498 100644 --- a/uploader/background_jobs.py +++ b/uploader/background_jobs.py @@ -56,7 +56,7 @@ def register_job_handlers(job: str): return getattr(module, _parts[-1]) metadata = job["metadata"] - if metadata["success_handler"]: + if metadata.get("success_handler"): _success_handler = __load_handler__(metadata["success_handler"]) try: _error_handler = __load_handler__(metadata["error_handler"]) @@ -76,8 +76,7 @@ def handler(job: dict, handler_type: str) -> HandlerType: ).get(handler_type) if bool(_handler): return _handler(job) - raise Exception(# pylint: disable=[broad-exception-raised] - f"No '{handler_type}' handler registered for job type: {_job_type}") + return render_template("background-jobs/default-success-page.html", job=job) error_handler = partial(handler, handler_type="error") diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index ac36ec8..7002ccd 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -60,6 +60,7 @@ from .models import (dataset_by_id, datasets_by_population, phenotype_publication_data) +logger = logging.getLogger(__name__) phenotypesbp = Blueprint("phenotypes", __name__) render_template = make_template_renderer("phenotypes") @@ -233,11 +234,6 @@ def view_phenotype(# pylint: disable=[unused-argument] population["Id"], dataset["Id"], xref_id) - def __non_empty__(value) -> bool: - if isinstance(value, str): - return value.strip() != "" - return bool(value) - return render_template( "phenotypes/view-phenotype.html", species=species, @@ -1008,3 +1004,74 @@ def load_data_success( fragment=""))) except JobNotFound as _jnf: return render_template("jobs/job-not-found.html", job_id=job_id) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/recompute-means", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def recompute_means(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + """Compute/Recompute the means for phenotypes in a particular population.""" + _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + _job_id = uuid.uuid4() + _xref_ids = tuple(int(item.split("_")[-1]) + for item in request.form.getlist("selected-phenotypes")) + + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.compute_phenotype_means", + app.config["SQL_URI"], + _jobs_db, + str(population["Id"]), + "--log-level", + _loglevel] + ( + ["--cross-ref-ids", ",".join(str(_id) for _id in _xref_ids)] + if len(_xref_ids) > 0 else + []) + logger.debug("%s.recompute_means: command (%s)", __name__, command) + + with sqlite3.connection(_jobs_db) as conn: + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job( + conn, + _job_id, + command, + "(re)compute-phenotype-means", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "success_handler": ( + "uploader.phenotypes.views." + "recompute_phenotype_means_success_handler") + }), + _jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + return redirect(url_for("background-jobs.job_status", + job_id=_job["job_id"])) + + +def recompute_phenotype_means_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + flash("Means computed successfully!", "alert alert-success") + return redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) diff --git a/uploader/route_utils.py b/uploader/route_utils.py index 63b2852..53247e6 100644 --- a/uploader/route_utils.py +++ b/uploader/route_utils.py @@ -1,5 +1,5 @@ """Generic routing utilities.""" -import json +import logging from json.decoder import JSONDecodeError from flask import (flash, @@ -15,6 +15,8 @@ from uploader.datautils import base64_encode_dict, base64_decode_to_dict from uploader.population.models import (populations_by_species, population_by_species_and_id) +logger = logging.getLogger(__name__) + def generic_select_population( # pylint: disable=[too-many-arguments, too-many-positional-arguments] species: dict, @@ -56,9 +58,9 @@ def redirect_to_next(default: dict): assert "uri" in default, "You must provide at least the 'uri' value." try: next_page = base64_decode_to_dict(request.args.get("next")) - return redirect(url_for( - next_page["uri"], - **{key:value for key,value in next_page.items()})) + _uri = next_page["uri"] + next_page.pop("uri") + return redirect(url_for(_uri, **next_page)) except (TypeError, JSONDecodeError) as _err: logger.debug("We could not decode the next value '%s'", next_page, @@ -66,7 +68,7 @@ def redirect_to_next(default: dict): return redirect(url_for( default["uri"], - **{key:value for key,value in default.items()})) + **{key:value for key,value in default.items() if key != "uri"})) def build_next_argument(uri: str, **kwargs) -> str: diff --git a/uploader/templates/background-jobs/default-success-page.html b/uploader/templates/background-jobs/default-success-page.html new file mode 100644 index 0000000..5732456 --- /dev/null +++ b/uploader/templates/background-jobs/default-success-page.html @@ -0,0 +1,17 @@ +{%extends "phenotypes/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}Background Jobs: Success{%endblock%} + +{%block pagetitle%}Background Jobs: Success{%endblock%} + +{%block contents%} +{{flash_all_messages()}} + +<div class="row"> + <p>Job <strong>{{job.job_id}}</strong>, + {%if job.get("metadata", {}).get("job-type")%} + of type '<em>{{job.metadata["job-type"]}}</em> + {%endif%}' completed successfully.</p> +</div> +{%endblock%} diff --git a/uploader/templates/phenotypes/view-dataset.html b/uploader/templates/phenotypes/view-dataset.html index 306dcce..6a261fc 100644 --- a/uploader/templates/phenotypes/view-dataset.html +++ b/uploader/templates/phenotypes/view-dataset.html @@ -46,12 +46,32 @@ </div> <div class="row"> - <p><a href="{{url_for('species.populations.phenotypes.add_phenotypes', - species_id=species.SpeciesId, - population_id=population.Id, - dataset_id=dataset.Id)}}" - title="Add a bunch of phenotypes" - class="btn btn-primary">Add phenotypes</a></p> + <div class="col"> + <a href="{{url_for('species.populations.phenotypes.add_phenotypes', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}" + title="Add a bunch of phenotypes" + class="btn btn-primary">Add phenotypes</a> + </div> + + <div class="col"> + <form id="frm-recompute-phenotype-means" + method="POST" + action="{{url_for( + 'species.populations.phenotypes.recompute_means', + species_id=species['SpeciesId'], + population_id=population['Id'], + dataset_id=dataset['Id'])}}" + class="d-flex flex-row align-items-center flex-wrap" + style="display: inline;"> + <input type="submit" + title="Compute/Recompute the means for all phenotypes." + class="btn btn-info" + value="(rec/c)ompute means" + id="submit-frm-recompute-phenotype-means" /> + </form> + </div> </div> <div class="row"> |
