aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-12-18 11:36:07 +0300
committerFrederick Muriuki Muriithi2021-12-24 14:36:14 +0300
commit4304e7298af6769110a251b21fca2f105bee4c06 (patch)
tree2f483c955803c2033ad7b7bc0594bc3313c8395c
parent5abe0158daed9428484925b95bb4a8b2251adea2 (diff)
downloadgenenetwork3-4304e7298af6769110a251b21fca2f105bee4c06.tar.gz
Reduce the total amount of data to be output
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * There is a lot of data that is not necessary in the final result. This commit removes that data, retaining only data relevant for the display.
-rw-r--r--gn3/computations/partial_correlations.py56
1 files changed, 53 insertions, 3 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 1c02533..9fb17f7 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -526,6 +526,54 @@ def tissue_correlation_by_list(
} for trait in trait_list)
return trait_list
+def trait_for_output(trait):
+ """
+ Process a trait for output.
+
+ Removes a lot of extraneous data from the trait, that is not needed for
+ the display of partial correlation results.
+ This function also removes all key-value pairs, for which the value is
+ `None`, because it is a waste of network resources to transmit the key-value
+ pair just to indicate it does not exist.
+ """
+ trait = {
+ "trait_type": trait["trait_type"],
+ "dataset_name": trait["db"]["dataset_name"],
+ "dataset_type": trait["db"]["dataset_type"],
+ "group": trait["db"]["group"],
+ "trait_fullname": trait["trait_fullname"],
+ "trait_name": trait["trait_name"],
+ "symbol": trait.get("symbol"),
+ "description": trait.get("description"),
+ "pre_publication_description": trait.get(
+ "pre_publication_description"),
+ "post_publication_description": trait.get(
+ "post_publication_description"),
+ "original_description": trait.get(
+ "original_description"),
+ "authors": trait.get("authors"),
+ "year": trait.get("year"),
+ "probe_target_description": trait.get(
+ "probe_target_description"),
+ "chr": trait.get("chr"),
+ "mb": trait.get("mb"),
+ "geneid": trait.get("geneid"),
+ "homologeneid": trait.get("homologeneid"),
+ "noverlap": trait.get("noverlap"),
+ "partial_corr": trait.get("partial_corr"),
+ "partial_corr_p_value": trait.get("partial_corr_p_value"),
+ "corr": trait.get("corr"),
+ "corr_p_value": trait.get("corr_p_value"),
+ "rank_order": trait.get("rank_order"),
+ "delta": (
+ None if trait.get("partial_corr") is None
+ else (trait.get("partial_corr") - trait.get("corr"))),
+ "l_corr": trait.get("l_corr"),
+ "tissue_corr": trait.get("tissue_corr"),
+ "tissue_p_value": trait.get("tissue_p_value")
+ }
+ return {key: val for key, val in trait.items() if val is not None}
+
def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
conn: Any, primary_trait_name: str,
control_trait_names: Tuple[str, ...], method: str,
@@ -717,8 +765,10 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
return {
"status": "success",
"results": {
- "primary_trait": primary_trait,
- "control_traits": cntrl_traits,
- "correlations": trait_list,
+ "primary_trait": trait_for_output(primary_trait),
+ "control_traits": tuple(
+ trait_for_output(trait) for trait in cntrl_traits),
+ "correlations": tuple(
+ trait_for_output(trait) for trait in trait_list),
"dataset_type": target_dataset["type"]
}}