From 389fe9fee11760d8d046983bf27c82c019fd6d97 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 30 Dec 2021 10:21:46 +0300 Subject: Convert NaN to None Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi Comment: https://github.com/genenetwork/genenetwork3/pull/67#issuecomment-1000828159 * Convert NaN values to None to avoid possible bugs with the string replace method used before. --- gn3/api/correlation.py | 10 +++++----- gn3/computations/partial_correlations.py | 32 +++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index 1caf31f..b46855f 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -98,10 +98,10 @@ def partial_correlation(): Class to encode output into JSON, for objects which the default json.JSONEncoder class does not have default encoding for. """ - def default(self, obj): - if isinstance(obj, bytes): - return str(obj, encoding="utf-8") - return json.JSONEncoder.default(self, obj) + def default(self, o): + if isinstance(o, bytes): + return str(o, encoding="utf-8") + return json.JSONEncoder.default(self, o) args = request.get_json() conn, _cursor_object = database_connector() @@ -110,7 +110,7 @@ def partial_correlation(): tuple(trait_fullname(trait) for trait in args["control_traits"]), args["method"], int(args["criteria"]), args["target_db"]) response = make_response( - json.dumps(corr_results, cls=OutputEncoder).replace(": NaN", ": null"), + json.dumps(corr_results, cls=OutputEncoder), 400 if "error" in corr_results.keys() else 200) response.headers["Content-Type"] = "application/json" return response diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 0d4394b..e6056d5 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -9,6 +9,7 @@ import math from functools import reduce, partial from typing import Any, Tuple, Union, Sequence +import numpy import pandas import pingouin from scipy.stats import pearsonr, spearmanr @@ -538,6 +539,11 @@ def trait_for_output(trait): `None`, because it is a waste of network resources to transmit the key-value pair just to indicate it does not exist. """ + def __nan_to_none__(val): + if math.isnan(val) or numpy.isnan(val): + return None + return val + trait = { "trait_type": trait["db"]["dataset_type"], "dataset_name": trait["db"]["dataset_name"], @@ -562,19 +568,27 @@ def trait_for_output(trait): "geneid": trait.get("geneid"), "homologeneid": trait.get("homologeneid"), "noverlap": trait.get("noverlap"), - "partial_corr": trait.get("partial_corr"), - "partial_corr_p_value": trait.get("partial_corr_p_value"), - "corr": trait.get("corr"), - "corr_p_value": trait.get("corr_p_value"), - "rank_order": trait.get("rank_order"), + "partial_corr": __nan_to_none__(trait.get("partial_corr")), + "partial_corr_p_value": __nan_to_none__( + trait.get("partial_corr_p_value")), + "corr": __nan_to_none__(trait.get("corr")), + "corr_p_value": __nan_to_none__(trait.get("corr_p_value")), + "rank_order": __nan_to_none__(trait.get("rank_order")), "delta": ( None if trait.get("partial_corr") is None else (trait.get("partial_corr") - trait.get("corr"))), - "l_corr": trait.get("l_corr"), - "tissue_corr": trait.get("tissue_corr"), - "tissue_p_value": trait.get("tissue_p_value") + "l_corr": __nan_to_none__(trait.get("l_corr")), + "tissue_corr": __nan_to_none__(trait.get("tissue_corr")), + "tissue_p_value": __nan_to_none__(trait.get("tissue_p_value")) } - return {key: val for key, val in trait.items() if val is not None} + return { + key: val + for key, val in trait.items() + if ( + val is not None + or key in ( + "partial_corr_p_value", "corr", "corr_p_value", "rank_order", + "delta", "l_corr", "tissue_corr", "tissue_p_value"))} def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] conn: Any, primary_trait_name: str, -- cgit v1.2.3