about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-12-30 10:21:46 +0300
committerFrederick Muriuki Muriithi2022-01-10 08:15:19 +0300
commit389fe9fee11760d8d046983bf27c82c019fd6d97 (patch)
treeb66d1a9c34165f7cc5f953e0d968c10e4afcf10e
parent032b259a3088402d90ca6d24bb987d5fb6ae1a57 (diff)
downloadgenenetwork3-389fe9fee11760d8d046983bf27c82c019fd6d97.tar.gz
Convert NaN to None
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
Comment:
https://github.com/genenetwork/genenetwork3/pull/67#issuecomment-1000828159

* Convert NaN values to None to avoid possible bugs with the string replace
  method used before.
-rw-r--r--gn3/api/correlation.py10
-rw-r--r--gn3/computations/partial_correlations.py32
2 files changed, 28 insertions, 14 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index 1caf31f..b46855f 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -98,10 +98,10 @@ def partial_correlation():
         Class to encode output into JSON, for objects which the default
         json.JSONEncoder class does not have default encoding for.
         """
-        def default(self, obj):
-            if isinstance(obj, bytes):
-                return str(obj, encoding="utf-8")
-            return json.JSONEncoder.default(self, obj)
+        def default(self, o):
+            if isinstance(o, bytes):
+                return str(o, encoding="utf-8")
+            return json.JSONEncoder.default(self, o)
 
     args = request.get_json()
     conn, _cursor_object = database_connector()
@@ -110,7 +110,7 @@ def partial_correlation():
         tuple(trait_fullname(trait) for trait in args["control_traits"]),
         args["method"], int(args["criteria"]), args["target_db"])
     response = make_response(
-        json.dumps(corr_results, cls=OutputEncoder).replace(": NaN", ": null"),
+        json.dumps(corr_results, cls=OutputEncoder),
         400 if "error" in corr_results.keys() else 200)
     response.headers["Content-Type"] = "application/json"
     return response
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 0d4394b..e6056d5 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -9,6 +9,7 @@ import math
 from functools import reduce, partial
 from typing import Any, Tuple, Union, Sequence
 
+import numpy
 import pandas
 import pingouin
 from scipy.stats import pearsonr, spearmanr
@@ -538,6 +539,11 @@ def trait_for_output(trait):
     `None`, because it is a waste of network resources to transmit the key-value
     pair just to indicate it does not exist.
     """
+    def __nan_to_none__(val):
+        if math.isnan(val) or numpy.isnan(val):
+            return None
+        return val
+
     trait = {
         "trait_type": trait["db"]["dataset_type"],
         "dataset_name": trait["db"]["dataset_name"],
@@ -562,19 +568,27 @@ def trait_for_output(trait):
         "geneid": trait.get("geneid"),
         "homologeneid": trait.get("homologeneid"),
         "noverlap": trait.get("noverlap"),
-        "partial_corr": trait.get("partial_corr"),
-        "partial_corr_p_value": trait.get("partial_corr_p_value"),
-        "corr": trait.get("corr"),
-        "corr_p_value": trait.get("corr_p_value"),
-        "rank_order": trait.get("rank_order"),
+        "partial_corr": __nan_to_none__(trait.get("partial_corr")),
+        "partial_corr_p_value": __nan_to_none__(
+            trait.get("partial_corr_p_value")),
+        "corr": __nan_to_none__(trait.get("corr")),
+        "corr_p_value": __nan_to_none__(trait.get("corr_p_value")),
+        "rank_order": __nan_to_none__(trait.get("rank_order")),
         "delta": (
             None if trait.get("partial_corr") is None
             else (trait.get("partial_corr") - trait.get("corr"))),
-        "l_corr":  trait.get("l_corr"),
-        "tissue_corr": trait.get("tissue_corr"),
-        "tissue_p_value": trait.get("tissue_p_value")
+        "l_corr": __nan_to_none__(trait.get("l_corr")),
+        "tissue_corr": __nan_to_none__(trait.get("tissue_corr")),
+        "tissue_p_value": __nan_to_none__(trait.get("tissue_p_value"))
     }
-    return {key: val for key, val in trait.items() if val is not None}
+    return {
+        key: val
+        for key, val in trait.items()
+        if (
+            val is not None
+            or key in (
+                "partial_corr_p_value", "corr", "corr_p_value", "rank_order",
+                "delta", "l_corr", "tissue_corr", "tissue_p_value"))}
 
 def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
         conn: Any, primary_trait_name: str,