diff options
author | Frederick Muriuki Muriithi | 2021-12-18 11:36:07 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2021-12-24 14:36:14 +0300 |
commit | 4304e7298af6769110a251b21fca2f105bee4c06 (patch) | |
tree | 2f483c955803c2033ad7b7bc0594bc3313c8395c /gn3/computations | |
parent | 5abe0158daed9428484925b95bb4a8b2251adea2 (diff) | |
download | genenetwork3-4304e7298af6769110a251b21fca2f105bee4c06.tar.gz |
Reduce the total amount of data to be output
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* There is a lot of data that is not necessary in the final result. This
commit removes that data, retaining only data relevant for the display.
Diffstat (limited to 'gn3/computations')
-rw-r--r-- | gn3/computations/partial_correlations.py | 56 |
1 files changed, 53 insertions, 3 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 1c02533..9fb17f7 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -526,6 +526,54 @@ def tissue_correlation_by_list( } for trait in trait_list) return trait_list +def trait_for_output(trait): + """ + Process a trait for output. + + Removes a lot of extraneous data from the trait, that is not needed for + the display of partial correlation results. + This function also removes all key-value pairs, for which the value is + `None`, because it is a waste of network resources to transmit the key-value + pair just to indicate it does not exist. + """ + trait = { + "trait_type": trait["trait_type"], + "dataset_name": trait["db"]["dataset_name"], + "dataset_type": trait["db"]["dataset_type"], + "group": trait["db"]["group"], + "trait_fullname": trait["trait_fullname"], + "trait_name": trait["trait_name"], + "symbol": trait.get("symbol"), + "description": trait.get("description"), + "pre_publication_description": trait.get( + "pre_publication_description"), + "post_publication_description": trait.get( + "post_publication_description"), + "original_description": trait.get( + "original_description"), + "authors": trait.get("authors"), + "year": trait.get("year"), + "probe_target_description": trait.get( + "probe_target_description"), + "chr": trait.get("chr"), + "mb": trait.get("mb"), + "geneid": trait.get("geneid"), + "homologeneid": trait.get("homologeneid"), + "noverlap": trait.get("noverlap"), + "partial_corr": trait.get("partial_corr"), + "partial_corr_p_value": trait.get("partial_corr_p_value"), + "corr": trait.get("corr"), + "corr_p_value": trait.get("corr_p_value"), + "rank_order": trait.get("rank_order"), + "delta": ( + None if trait.get("partial_corr") is None + else (trait.get("partial_corr") - trait.get("corr"))), + "l_corr": trait.get("l_corr"), + "tissue_corr": trait.get("tissue_corr"), + "tissue_p_value": trait.get("tissue_p_value") + } + return {key: val for key, val in trait.items() if val is not None} + def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] conn: Any, primary_trait_name: str, control_trait_names: Tuple[str, ...], method: str, @@ -717,8 +765,10 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] return { "status": "success", "results": { - "primary_trait": primary_trait, - "control_traits": cntrl_traits, - "correlations": trait_list, + "primary_trait": trait_for_output(primary_trait), + "control_traits": tuple( + trait_for_output(trait) for trait in cntrl_traits), + "correlations": tuple( + trait_for_output(trait) for trait in trait_list), "dataset_type": target_dataset["type"] }} |