diff options
author | BonfaceKilz | 2022-01-05 17:01:24 +0300 |
---|---|---|
committer | GitHub | 2022-01-05 17:01:24 +0300 |
commit | 0b1643e87cf4303db3673dcf3cf240aeb4d518cb (patch) | |
tree | 62ed5cae507303a469c040eb47a8b5b70b3a1a93 /gn3 | |
parent | ac28fb48e4e3197de6bfeef332198b70689837c9 (diff) | |
parent | af52afa4318feadfa3cd1cc4dcdd3d86907f68a4 (diff) | |
download | genenetwork3-0b1643e87cf4303db3673dcf3cf240aeb4d518cb.tar.gz |
Merge branch 'main' into fix/check-for-duplicates-before-deletions-or-insertions
Diffstat (limited to 'gn3')
-rw-r--r-- | gn3/api/correlation.py | 21 | ||||
-rw-r--r-- | gn3/computations/correlations.py | 5 | ||||
-rw-r--r-- | gn3/computations/partial_correlations.py | 93 | ||||
-rw-r--r-- | gn3/db/datasets.py | 12 |
4 files changed, 106 insertions, 25 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index e936eaf..1caf31f 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -1,7 +1,9 @@ """Endpoints for running correlations""" +import json from flask import jsonify from flask import Blueprint from flask import request +from flask import make_response from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import compute_all_lit_correlation @@ -87,15 +89,28 @@ def compute_tissue_corr(corr_method="pearson"): @correlation.route("/partial", methods=["POST"]) def partial_correlation(): + """API endpoint for partial correlations.""" def trait_fullname(trait): return f"{trait['dataset']}::{trait['name']}" + class OutputEncoder(json.JSONEncoder): + """ + Class to encode output into JSON, for objects which the default + json.JSONEncoder class does not have default encoding for. + """ + def default(self, obj): + if isinstance(obj, bytes): + return str(obj, encoding="utf-8") + return json.JSONEncoder.default(self, obj) + args = request.get_json() conn, _cursor_object = database_connector() corr_results = partial_correlations_entry( conn, trait_fullname(args["primary_trait"]), tuple(trait_fullname(trait) for trait in args["control_traits"]), args["method"], int(args["criteria"]), args["target_db"]) - return make_response( - jsonify(corr_results), - 400) + response = make_response( + json.dumps(corr_results, cls=OutputEncoder).replace(": NaN", ": null"), + 400 if "error" in corr_results.keys() else 200) + response.headers["Content-Type"] = "application/json" + return response diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index d38946e..1b4b3a4 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -7,6 +7,7 @@ from typing import List from typing import Tuple from typing import Optional from typing import Callable +from typing import Generator import scipy.stats import pingouin as pg @@ -79,7 +80,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, zip(*list(normalize_values(trait_vals, target_samples_vals)))) num_overlap = len(normalized_traits_vals) except ValueError: - return + return None if num_overlap > 5: @@ -106,7 +107,7 @@ package :not packaged in guix def filter_shared_sample_keys(this_samplelist, - target_samplelist) -> Tuple[List, List]: + target_samplelist) -> Generator: """Given primary and target sample-list for two base and target trait select filter the values using the shared keys diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 13c411a..984c15a 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -217,7 +217,7 @@ def good_dataset_samples_indexes( def partial_correlations_fast(# pylint: disable=[R0913, R0914] samples, primary_vals, control_vals, database_filename, fetched_correlations, method: str, correlation_type: str) -> Tuple[ - float, Tuple[float, ...]]: + int, Tuple[float, ...]]: """ Computes partial correlation coefficients using data from a CSV file. @@ -350,7 +350,9 @@ def compute_partial( def partial_correlations_normal(# pylint: disable=R0913 primary_vals, control_vals, input_trait_gene_id, trait_database, data_start_pos: int, db_type: str, method: str) -> Tuple[ - float, Tuple[float, ...]]: + int, Tuple[Union[ + Tuple[str, int, float, float, float, float], None], + ...]]:#Tuple[float, ...] """ Computes the correlation coefficients. @@ -485,7 +487,7 @@ def literature_correlation_by_list( def tissue_correlation_by_list( conn: Any, primary_trait_symbol: str, tissue_probeset_freeze_id: int, - method: str, trait_list: Tuple[dict]) -> Tuple[dict]: + method: str, trait_list: Tuple[dict]) -> Tuple[dict, ...]: """ This is a migration of the `web.webqtl.correlation.CorrelationPage.getTissueCorrelationByList` @@ -508,7 +510,7 @@ def tissue_correlation_by_list( primary_trait_value = prim_trait_symbol_value_dict[ primary_trait_symbol.lower()] gene_symbol_list = tuple( - trait for trait in trait_list if "symbol" in trait.keys()) + trait["symbol"] for trait in trait_list if "symbol" in trait.keys()) symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( gene_symbol_list, tissue_probeset_freeze_id, conn) return tuple( @@ -526,6 +528,54 @@ def tissue_correlation_by_list( } for trait in trait_list) return trait_list +def trait_for_output(trait): + """ + Process a trait for output. + + Removes a lot of extraneous data from the trait, that is not needed for + the display of partial correlation results. + This function also removes all key-value pairs, for which the value is + `None`, because it is a waste of network resources to transmit the key-value + pair just to indicate it does not exist. + """ + trait = { + "trait_type": trait["trait_type"], + "dataset_name": trait["db"]["dataset_name"], + "dataset_type": trait["db"]["dataset_type"], + "group": trait["db"]["group"], + "trait_fullname": trait["trait_fullname"], + "trait_name": trait["trait_name"], + "symbol": trait.get("symbol"), + "description": trait.get("description"), + "pre_publication_description": trait.get( + "pre_publication_description"), + "post_publication_description": trait.get( + "post_publication_description"), + "original_description": trait.get( + "original_description"), + "authors": trait.get("authors"), + "year": trait.get("year"), + "probe_target_description": trait.get( + "probe_target_description"), + "chr": trait.get("chr"), + "mb": trait.get("mb"), + "geneid": trait.get("geneid"), + "homologeneid": trait.get("homologeneid"), + "noverlap": trait.get("noverlap"), + "partial_corr": trait.get("partial_corr"), + "partial_corr_p_value": trait.get("partial_corr_p_value"), + "corr": trait.get("corr"), + "corr_p_value": trait.get("corr_p_value"), + "rank_order": trait.get("rank_order"), + "delta": ( + None if trait.get("partial_corr") is None + else (trait.get("partial_corr") - trait.get("corr"))), + "l_corr": trait.get("l_corr"), + "tissue_corr": trait.get("tissue_corr"), + "tissue_p_value": trait.get("tissue_p_value") + } + return {key: val for key, val in trait.items() if val is not None} + def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] conn: Any, primary_trait_name: str, control_trait_names: Tuple[str, ...], method: str, @@ -669,19 +719,30 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] def __make_sorter__(method): - def __sort_6__(row): - return row[6] - - def __sort_3__(row): + def __compare_lit_or_tiss_correlation_values_(row): + # Index Content + # 0 trait name + # 1 N + # 2 partial correlation coefficient + # 3 p value of partial correlation + # 6 literature/tissue correlation value + return (row[6], row[3]) + + def __compare_partial_correlation_p_values__(row): + # Index Content + # 0 trait name + # 1 partial correlation coefficient + # 2 N + # 3 p value of partial correlation return row[3] if "literature" in method.lower(): - return __sort_6__ + return __compare_lit_or_tiss_correlation_values_ if "tissue" in method.lower(): - return __sort_6__ + return __compare_lit_or_tiss_correlation_values_ - return __sort_3__ + return __compare_partial_correlation_p_values__ sorted_correlations = sorted( all_correlations, key=__make_sorter__(method)) @@ -717,7 +778,11 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] return { "status": "success", "results": { - "primary_trait": primary_trait, - "control_traits": cntrl_traits, - "correlations": trait_list + "primary_trait": trait_for_output(primary_trait), + "control_traits": tuple( + trait_for_output(trait) for trait in cntrl_traits), + "correlations": tuple( + trait_for_output(trait) for trait in trait_list), + "dataset_type": target_dataset["type"], + "method": "spearman" if "spearman" in method.lower() else "pearson" }} diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index c50e148..a41e228 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -3,7 +3,7 @@ This module contains functions relating to specific trait dataset manipulation """ import re from string import Template -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from SPARQLWrapper import JSON, SPARQLWrapper from gn3.settings import SPARQL_ENDPOINT @@ -297,7 +297,7 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn): **group } -def sparql_query(query: str) -> Dict[str, Any]: +def sparql_query(query: str) -> List[Dict[str, Any]]: """Run a SPARQL query and return the bound variables.""" sparql = SPARQLWrapper(SPARQL_ENDPOINT) sparql.setQuery(query) @@ -328,7 +328,7 @@ WHERE { OPTIONAL { ?dataset gn:geoSeries ?geo_series } . } """, - """ + """ PREFIX gn: <http://genenetwork.org/> SELECT ?platform_name ?normalization_name ?species_name ?inbred_set_name ?tissue_name WHERE { @@ -341,7 +341,7 @@ WHERE { OPTIONAL { ?dataset gn:datasetOfPlatform / gn:name ?platform_name } . } """, - """ + """ PREFIX gn: <http://genenetwork.org/> SELECT ?specifics ?summary ?about_cases ?about_tissue ?about_platform ?about_data_processing ?notes ?experiment_design ?contributors @@ -362,8 +362,8 @@ WHERE { OPTIONAL { ?dataset gn:acknowledgment ?acknowledgment . } } """] - result = {'accession_id': accession_id, - 'investigator': {}} + result: Dict[str, Any] = {'accession_id': accession_id, + 'investigator': {}} query_result = {} for query in queries: if sparql_result := sparql_query(Template(query).substitute(accession_id=accession_id)): |