diff options
Diffstat (limited to 'wqflask/wqflask/partial_correlations_views.py')
-rw-r--r-- | wqflask/wqflask/partial_correlations_views.py | 372 |
1 files changed, 0 insertions, 372 deletions
diff --git a/wqflask/wqflask/partial_correlations_views.py b/wqflask/wqflask/partial_correlations_views.py deleted file mode 100644 index c680d913..00000000 --- a/wqflask/wqflask/partial_correlations_views.py +++ /dev/null @@ -1,372 +0,0 @@ -import json -import math -import requests -from functools import reduce -from typing import Union, Tuple -from urllib.parse import urljoin - -from flask import ( - flash, - request, - url_for, - redirect, - current_app, - render_template) - -from wqflask import app -from utility.tools import get_setting, GN_SERVER_URL -from wqflask.database import database_connection -from gn3.db.partial_correlations import traits_info - -def publish_target_databases(conn, groups, threshold): - query = ( - "SELECT PublishFreeze.FullName,PublishFreeze.Name " - "FROM PublishFreeze, InbredSet " - "WHERE PublishFreeze.InbredSetId = InbredSet.Id " - f"AND InbredSet.Name IN ({', '.join(['%s'] * len(groups))}) " - "AND PublishFreeze.public > %s") - with conn.cursor() as cursor: - cursor.execute(query, tuple(groups) + (threshold,)) - res = cursor.fetchall() - if res: - return tuple( - dict(zip(("description", "value"), row)) for row in res) - - return tuple() - -def geno_target_databases(conn, groups, threshold): - query = ( - "SELECT GenoFreeze.FullName,GenoFreeze.Name " - "FROM GenoFreeze, InbredSet " - "WHERE GenoFreeze.InbredSetId = InbredSet.Id " - f"AND InbredSet.Name IN ({', '.join(['%s'] * len(groups))}) " - "AND GenoFreeze.public > %s") - with conn.cursor() as cursor: - cursor.execute(query, tuple(groups) + (threshold,)) - res = cursor.fetchall() - if res: - return tuple( - dict(zip(("description", "value"), row)) for row in res) - - return tuple() - -def probeset_target_databases(conn, groups, threshold): - query1 = "SELECT Id, Name FROM Tissue order by Name" - with conn.cursor() as cursor: - cursor.execute(query1) - tissue_res = cursor.fetchall() - if tissue_res: - tissue_ids = tuple(row[0] for row in tissue_res) - groups_clauses = ["InbredSet.Name like %s"] * len(groups) - query2 = ( - "SELECT ProbeFreeze.TissueId, ProbeSetFreeze.FullName, " - "ProbeSetFreeze.Name " - "FROM ProbeSetFreeze, ProbeFreeze, InbredSet " - "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " - "AND ProbeFreeze.TissueId IN " - f"({', '.join(['%s'] * len(tissue_ids))}) " - "AND ProbeSetFreeze.public > %s " - "AND ProbeFreeze.InbredSetId = InbredSet.Id " - f"AND ({' OR '.join(groups_clauses)}) " - "ORDER BY ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId") - cursor.execute(query2, tissue_ids + (threshold,) + tuple(groups)) - db_res = cursor.fetchall() - if db_res: - databases = tuple( - dict(zip(("tissue_id", "description", "value"), row)) - for row in db_res) - return tuple( - {tissue_name: tuple( - { - "value": item["value"], - "description": item["description"] - } for item in databases - if item["tissue_id"] == tissue_id)} - for tissue_id, tissue_name in tissue_res) - - return tuple() - -def target_databases(conn, traits, threshold): - """ - Retrieves the names of possible target databases from the database. - """ - trait_info = traits_info( - conn, threshold, - tuple(f"{trait['dataset']}::{trait['trait_name']}" for trait in traits)) - groups = tuple(set(row["db"]["group"] for row in trait_info)) - return ( - publish_target_databases(conn, groups, threshold) + - geno_target_databases(conn, groups, threshold) + - probeset_target_databases(conn, groups, threshold)) - -def primary_error(args): - if len(args["primary_trait"]) == 0 or len(args["primary_trait"]) > 1: - return { - **args, - "errors": (args.get("errors", tuple()) + - ("You must provide one, and only one primary trait",))} - return args - -def controls_error(args): - if len(args["control_traits"]) == 0 or len(args["control_traits"]) > 3: - return { - **args, - "errors": ( - args.get("errors", tuple()) + - (("You must provide at least one control trait, and a maximum " - "of three control traits"),))} - return args - -def target_traits_error(args, with_target_traits): - target_traits_present = ( - (args.get("target_traits") is not None) and - (len(args["target_traits"]) > 0)) - if with_target_traits and not target_traits_present: - return { - **args, - "errors": ( - args.get("errors", tuple()) + - (("You must provide at least one target trait"),))} - return args - -def target_db_error(args, with_target_db: bool): - if with_target_db and not args["target_db"]: - return { - **args, - "errors": ( - args.get("errors", tuple()) + - ("The target database must be provided",))} - return args - -def method_error(args): - methods = ( - "pearson's r", "spearman's rho", - "genetic correlation, pearson's r", - "genetic correlation, spearman's rho", - "sgo literature correlation", - "tissue correlation, pearson's r", - "tissue correlation, spearman's rho") - if not args["method"] or args["method"].lower() not in methods: - return { - **args, - "errors": ( - args.get("errors", tuple()) + - ("Invalid correlation method provided",))} - return args - -def criteria_error(args): - try: - int(args.get("criteria", "invalid")) - return args - except ValueError: - return { - **args, - "errors": ( - args.get("errors", tuple()) + - ("Invalid return number provided",))} - -def errors(args, with_target_db: bool): - return { - **criteria_error( - method_error( - target_traits_error( - target_db_error( - controls_error(primary_error(args)), - with_target_db), - not with_target_db))), - "with_target_db": with_target_db - } - -def __classify_args(acc, item): - if item[1].startswith("primary_"): - return { - **acc, - "primary_trait": (acc.get("primary_trait", tuple()) + (item,))} - if item[1].startswith("controls_"): - return {**acc, "control_traits": (acc.get("control_traits", tuple()) + (item,))} - if item[1].startswith("targets_"): - return {**acc, "target_traits": (acc.get("target_traits", tuple()) + (item,))} - if item[0] == "target_db": - return {**acc, "target_db": item[1]} - if item[0] == "method": - return {**acc, "method": item[1]} - if item[0] == "criteria": - return {**acc, "criteria": item[1]} - return acc - -def __build_args(raw_form, traits): - args = reduce(__classify_args, raw_form.items(), {}) - return { - **args, - "primary_trait": [ - item for item in traits if item["trait_name"] in - (name[1][8:] for name in args["primary_trait"])], - "control_traits": [ - item for item in traits if item["trait_name"] in - (name[1][9:] for name in args["control_traits"])], - "target_traits": [ - item for item in traits if item["trait_name"] in - (name[1][8:] for name in args.get("target_traits", tuple()))] - } - -def parse_trait(trait_str): - return dict(zip( - ("trait_name", "dataset", "description", "symbol", "location", "mean", - "lrs", "lrs_location"), - trait_str.strip().split("|||"))) - -def response_error_message(response): - error_messages = { - 404: ("We could not connect to the API server at this time. " - "Try again later."), - 500: ("The API server experienced a problem. We will be working on a " - "fix. Please try again later.") - } - return error_messages.get( - response.status_code, - "General API server error!!") - -def render_error(error_message, command_id = None): - return render_template( - "partial_correlations/pcorrs_error.html", - message = error_message, - command_id = command_id) - -def __format_number(num): - if num is None or math.isnan(num): - return "" - if abs(num) <= 1.04E-4: - return f"{num:.2e}" - return f"{num:.5f}" - -def handle_200_response(response): - if response.get("queued", False): - return redirect( - url_for( - "poll_partial_correlation_results", - command_id=response["results"]), - code=303) - if response["status"] == "success": - return render_template( - "partial_correlations/pcorrs_results_with_target_traits.html", - primary = response["results"]["results"]["primary_trait"], - controls = response["results"]["results"]["control_traits"], - pcorrs = sorted( - response["results"]["results"]["correlations"], - key = lambda item: item["partial_corr_p_value"]), - method = response["results"]["results"]["method"], - enumerate = enumerate, - format_number = __format_number) - return render_error(response["results"]) - -def handle_response(response): - if response.status_code != 200: - return render_template( - "partial_correlations/pcorrs_error.html", - message = response_error_message(response)) - return handle_200_response(response.json()) - -@app.route("/partial_correlations", methods=["POST"]) -def partial_correlations(): - form = request.form - traits = tuple( - parse_trait(trait) for trait in - form.get("trait_list").split(";;;")) - - submit = form.get("submit") - - if submit in ("with_target_pearsons", "with_target_spearmans"): - method = "pearsons" if "pearsons" in submit else "spearmans" - args = { - **errors(__build_args(form, traits), with_target_db=False), - "method": method - } - if len(args.get("errors", [])) == 0: - post_data = { - **args, - "primary_trait": args["primary_trait"][0], - "with_target_db": args["with_target_db"] - } - return handle_response(requests.post( - url=urljoin(GN_SERVER_URL, "correlation/partial"), - json=post_data)) - - for error in args["errors"]: - flash(error, "alert-danger") - - if submit == "Run Partial Correlations": - args = errors(__build_args(form, traits), with_target_db=True) - if len(args.get("errors", [])) == 0: - post_data = { - **args, - "primary_trait": args["primary_trait"][0], - "with_target_db": args["with_target_db"] - } - return handle_response(requests.post( - url=urljoin(GN_SERVER_URL, "correlation/partial"), - json=post_data)) - - for error in args["errors"]: - flash(error, "alert-danger") - - with database_connection(get_setting("SQL_URI")) as conn: - target_dbs = target_databases(conn, traits, threshold=0) - return render_template( - "partial_correlations/pcorrs_select_operations.html", - trait_list_str=form.get("trait_list"), - traits=traits, - target_dbs=target_dbs) - -def process_pcorrs_command_output(result): - if result["status"] == "success": - - if result["results"]["dataset_type"] == "NOT SET YET": - return render_template( - "partial_correlations/pcorrs_results_with_target_traits.html", - primary = result["results"]["primary_trait"], - controls = result["results"]["control_traits"], - pcorrs = sorted( - result["results"]["correlations"], - key = lambda item: item["partial_corr_p_value"]), - method = result["results"]["method"], - enumerate = enumerate, - format_number = __format_number) - - return render_template( - "partial_correlations/pcorrs_results_presentation.html", - primary=result["results"]["primary_trait"], - controls=result["results"]["control_traits"], - correlations=result["results"]["correlations"], - dataset_type=result["results"]["dataset_type"], - method=result["results"]["method"], - enumerate = enumerate, - format_number=__format_number) - if result["status"] == "error": - return render_error( - f"({result['error_type']}: {result['message']})") - -@app.route("/partial_correlations/<command_id>", methods=["GET"]) -def poll_partial_correlation_results(command_id): - response = requests.get( - url=urljoin(GN_SERVER_URL, f"async_commands/state/{command_id}")) - - if response.status_code == 200: - data = response.json() - raw_result = data["result"] - result = {"status": "computing"} - if raw_result: - result = json.loads(raw_result) - if result["status"].lower() in ("error", "exception"): - return render_error( - "We messed up, and the computation failed due to a system " - "error.", - command_id) - if data["status"] == "success": - return process_pcorrs_command_output(json.loads(data["result"])) - return render_template( - "partial_correlations/pcorrs_poll_results.html", - command_id = command_id) - return render_error( - "We messed up, and the computation failed due to a system " - "error.", - command_id) |