From 5abe0158daed9428484925b95bb4a8b2251adea2 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Sat, 18 Dec 2021 08:09:04 +0300 Subject: Add dataset type to the results Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * The dataset type is relevant for the display of the data, therefore, this commit presents the dataset type as part of the results. --- gn3/computations/partial_correlations.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 13c411a..1c02533 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -717,7 +717,8 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] return { "status": "success", "results": { - "primary_trait": primary_trait, - "control_traits": cntrl_traits, - "correlations": trait_list + "primary_trait": primary_trait, + "control_traits": cntrl_traits, + "correlations": trait_list, + "dataset_type": target_dataset["type"] }} -- cgit v1.2.3 From 4304e7298af6769110a251b21fca2f105bee4c06 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Sat, 18 Dec 2021 11:36:07 +0300 Subject: Reduce the total amount of data to be output Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * There is a lot of data that is not necessary in the final result. This commit removes that data, retaining only data relevant for the display. --- gn3/computations/partial_correlations.py | 56 ++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 1c02533..9fb17f7 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -526,6 +526,54 @@ def tissue_correlation_by_list( } for trait in trait_list) return trait_list +def trait_for_output(trait): + """ + Process a trait for output. + + Removes a lot of extraneous data from the trait, that is not needed for + the display of partial correlation results. + This function also removes all key-value pairs, for which the value is + `None`, because it is a waste of network resources to transmit the key-value + pair just to indicate it does not exist. + """ + trait = { + "trait_type": trait["trait_type"], + "dataset_name": trait["db"]["dataset_name"], + "dataset_type": trait["db"]["dataset_type"], + "group": trait["db"]["group"], + "trait_fullname": trait["trait_fullname"], + "trait_name": trait["trait_name"], + "symbol": trait.get("symbol"), + "description": trait.get("description"), + "pre_publication_description": trait.get( + "pre_publication_description"), + "post_publication_description": trait.get( + "post_publication_description"), + "original_description": trait.get( + "original_description"), + "authors": trait.get("authors"), + "year": trait.get("year"), + "probe_target_description": trait.get( + "probe_target_description"), + "chr": trait.get("chr"), + "mb": trait.get("mb"), + "geneid": trait.get("geneid"), + "homologeneid": trait.get("homologeneid"), + "noverlap": trait.get("noverlap"), + "partial_corr": trait.get("partial_corr"), + "partial_corr_p_value": trait.get("partial_corr_p_value"), + "corr": trait.get("corr"), + "corr_p_value": trait.get("corr_p_value"), + "rank_order": trait.get("rank_order"), + "delta": ( + None if trait.get("partial_corr") is None + else (trait.get("partial_corr") - trait.get("corr"))), + "l_corr": trait.get("l_corr"), + "tissue_corr": trait.get("tissue_corr"), + "tissue_p_value": trait.get("tissue_p_value") + } + return {key: val for key, val in trait.items() if val is not None} + def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] conn: Any, primary_trait_name: str, control_trait_names: Tuple[str, ...], method: str, @@ -717,8 +765,10 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] return { "status": "success", "results": { - "primary_trait": primary_trait, - "control_traits": cntrl_traits, - "correlations": trait_list, + "primary_trait": trait_for_output(primary_trait), + "control_traits": tuple( + trait_for_output(trait) for trait in cntrl_traits), + "correlations": tuple( + trait_for_output(trait) for trait in trait_list), "dataset_type": target_dataset["type"] }} -- cgit v1.2.3 From 0508fc422c033cfff8bbea118f85282212d236e4 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 20 Dec 2021 09:38:17 +0300 Subject: Return the correlation method used Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Return the correlation method used --- gn3/computations/partial_correlations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gn3/computations') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 9fb17f7..dbcbe29 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -770,5 +770,6 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] trait_for_output(trait) for trait in cntrl_traits), "correlations": tuple( trait_for_output(trait) for trait in trait_list), - "dataset_type": target_dataset["type"] + "dataset_type": target_dataset["type"], + "method": "spearman" if "spearman" in method.lower() else "pearson" }} -- cgit v1.2.3 From 2d3b6eae6953d5e4b00f21b5ffd683271d0f76bc Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 21 Dec 2021 13:04:51 +0300 Subject: Fix sorting Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Update the sorting algorithm, for literature and tissue correlations so that it sorts the results by the correlation value first then by the p-value next. --- gn3/computations/partial_correlations.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index dbcbe29..1e4a646 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -717,19 +717,30 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] def __make_sorter__(method): - def __sort_6__(row): - return row[6] - - def __sort_3__(row): + def __compare_lit_or_tiss_correlation_values_(row): + # Index Content + # 0 trait name + # 1 N + # 2 partial correlation coefficient + # 3 p value of partial correlation + # 6 literature/tissue correlation value + return (row[6], row[3]) + + def __compare_partial_correlation_p_values__(row): + # Index Content + # 0 trait name + # 1 partial correlation coefficient + # 2 N + # 3 p value of partial correlation return row[3] if "literature" in method.lower(): - return __sort_6__ + return __compare_lit_or_tiss_correlation_values_ if "tissue" in method.lower(): - return __sort_6__ + return __compare_lit_or_tiss_correlation_values_ - return __sort_3__ + return __compare_partial_correlation_p_values__ sorted_correlations = sorted( all_correlations, key=__make_sorter__(method)) -- cgit v1.2.3 From 672c1f2d43961feb59d9549557cb10c4f0b19dcf Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 22 Dec 2021 09:13:32 +0300 Subject: Fix linting errors --- gn3/authentication.py | 5 +++++ gn3/computations/correlations.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'gn3/computations') diff --git a/gn3/authentication.py b/gn3/authentication.py index a6372c1..4aedacd 100644 --- a/gn3/authentication.py +++ b/gn3/authentication.py @@ -163,3 +163,8 @@ def create_group(conn: Redis, group_name: Optional[str], } conn.hset("groups", group_id, json.dumps(group)) return group + # This might break stuff, but it fixes the linting error regarding + # inconsistent return types. + # @BonfaceKilz please review this and replace with appropriate return and + # remove these comments. + return None diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index d38946e..345b8d7 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -79,7 +79,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, zip(*list(normalize_values(trait_vals, target_samples_vals)))) num_overlap = len(normalized_traits_vals) except ValueError: - return + return None if num_overlap > 5: -- cgit v1.2.3 From 784447b17d85a618005ac9acfc57f5b7ef8f5169 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 22 Dec 2021 09:14:04 +0300 Subject: Fix typing errors --- gn3/computations/correlations.py | 3 ++- gn3/computations/partial_correlations.py | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 345b8d7..1b4b3a4 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -7,6 +7,7 @@ from typing import List from typing import Tuple from typing import Optional from typing import Callable +from typing import Generator import scipy.stats import pingouin as pg @@ -106,7 +107,7 @@ package :not packaged in guix def filter_shared_sample_keys(this_samplelist, - target_samplelist) -> Tuple[List, List]: + target_samplelist) -> Generator: """Given primary and target sample-list for two base and target trait select filter the values using the shared keys diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 1e4a646..984c15a 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -217,7 +217,7 @@ def good_dataset_samples_indexes( def partial_correlations_fast(# pylint: disable=[R0913, R0914] samples, primary_vals, control_vals, database_filename, fetched_correlations, method: str, correlation_type: str) -> Tuple[ - float, Tuple[float, ...]]: + int, Tuple[float, ...]]: """ Computes partial correlation coefficients using data from a CSV file. @@ -350,7 +350,9 @@ def compute_partial( def partial_correlations_normal(# pylint: disable=R0913 primary_vals, control_vals, input_trait_gene_id, trait_database, data_start_pos: int, db_type: str, method: str) -> Tuple[ - float, Tuple[float, ...]]: + int, Tuple[Union[ + Tuple[str, int, float, float, float, float], None], + ...]]:#Tuple[float, ...] """ Computes the correlation coefficients. @@ -485,7 +487,7 @@ def literature_correlation_by_list( def tissue_correlation_by_list( conn: Any, primary_trait_symbol: str, tissue_probeset_freeze_id: int, - method: str, trait_list: Tuple[dict]) -> Tuple[dict]: + method: str, trait_list: Tuple[dict]) -> Tuple[dict, ...]: """ This is a migration of the `web.webqtl.correlation.CorrelationPage.getTissueCorrelationByList` @@ -508,7 +510,7 @@ def tissue_correlation_by_list( primary_trait_value = prim_trait_symbol_value_dict[ primary_trait_symbol.lower()] gene_symbol_list = tuple( - trait for trait in trait_list if "symbol" in trait.keys()) + trait["symbol"] for trait in trait_list if "symbol" in trait.keys()) symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( gene_symbol_list, tissue_probeset_freeze_id, conn) return tuple( -- cgit v1.2.3