about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/computations/partial_correlations.py22
-rw-r--r--tests/unit/computations/test_dictify_by_samples.py108
2 files changed, 126 insertions, 4 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index f1757f9..85e3c11 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -6,6 +6,7 @@ GeneNetwork1.
 """
 
 import math
+import warnings
 from functools import reduce, partial
 from typing import Any, Tuple, Union, Sequence
 
@@ -78,11 +79,24 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
     This implementation extracts code that will find common use, and that will
     find use in more than one place.
     """
+    def __build_key_value_pairs__(
+            sample: str, value: Union[float, None],
+            variance: Union[float, None]) -> dict[
+                str, dict[str, Union[str, float, None]]]:
+        smp = sample.strip()
+        if smp == "":
+            warnings.warn(
+                "Empty strings for sample names is not allowed. Returning None",
+                category=RuntimeWarning)
+            return None
+        return (smp, {"sample_name": smp, "value": value, "variance": variance})
+
     return tuple(
-        {
-            sample: {"sample_name": sample, "value": val, "variance": var}
-            for sample, val, var in zip(*trait_line)
-        } for trait_line in zip(*(samples_vals_vars[0:3])))
+        dict(item for item in
+             (__build_key_value_pairs__(sample, val, var)
+              for sample, val, var in zip(*trait_line))
+             if item is not None)
+        for trait_line in zip(*(samples_vals_vars[0:3])))
 
 def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
     """
diff --git a/tests/unit/computations/test_dictify_by_samples.py b/tests/unit/computations/test_dictify_by_samples.py
new file mode 100644
index 0000000..decc095
--- /dev/null
+++ b/tests/unit/computations/test_dictify_by_samples.py
@@ -0,0 +1,108 @@
+from math import isnan
+import pytest
+from collections.abc import Sequence
+from hypothesis import given, strategies as st
+from gn3.computations.partial_correlations import dictify_by_samples
+
+
+def check_keys(samples, the_dict):
+    """Check that all the keys in `the_dict` are strings in `samples.`"""
+    return all(
+        (key in samples) for key in the_dict.keys())
+
+
+def same(val1, val2):
+    """
+    Check that values are similar.
+
+    In Python3 `float('nan') == float('nan')` always returns False. This
+    function thus, compares similarity rather than direct equality for NaN
+    values.
+
+    `Math.isnan(None)` would throw an error, thus this function takes advantage
+    of the `or` operation's short-circuit to avoid this failure in the case
+    where both values are NoneType values.
+    """
+    return (
+        (val1 is None and val2 is None) or
+        (isnan(val1) and isnan(val2)) or
+        (val1 == val2))
+
+def check_dict_keys_and_values(sample, value, variance, the_dict):
+    """
+    Check the following properties for each dict:
+    - has only `sample_name`, `value` and `variance` as the keys
+    - The values in the dict are the same ones in `sample`, `value` and
+      `variance`.
+    """
+    return (
+        all((key in ("sample_name", "value", "variance"))
+            for key in the_dict.keys()) and
+        the_dict["sample_name"] == sample and
+        same(the_dict["value"], value) and
+        same(the_dict["variance"], variance))
+
+def check_values(samples, values, variances, row):
+    """
+    Check that the values in each dict in `row` are made up from the values in
+    the `samples`, `values`, and `variances` sequences, skipping all values in
+    the `row` for which the sample name is an empty string.
+    """
+    return all(
+        check_dict_keys_and_values(smp, val, var, row[smp])
+        for smp, val, var in zip(samples, values, variances)
+        if smp != "")
+
+non_empty_samples = st.lists(
+    st.text(min_size=1, max_size=15).map(
+        lambda s: s.strip()))
+empty_samples = st.text(
+    alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter(
+        lambda s: len(s.strip()) == 0)
+values = st.lists(st.floats())
+variances = st.lists(st.one_of(st.none(), st.floats()))
+other = st.lists(st.integers())
+
+@given(svv=st.tuples(
+    st.lists(non_empty_samples),
+    st.lists(values),
+    st.lists(variances),
+    st.lists(other)))
+def test_dictifify_by_samples_with_nonempty_samples_strings(svv):
+    """
+    Test for `dictify_by_samples`.
+
+    Given a sequence of sequences of sequences
+
+    Check for the following properties:
+    - Returns a sequence of dicts
+    - Each dicts keys correspond to its index in the zeroth sequence in the
+      top-level sequence
+    """
+    res = dictify_by_samples(svv)
+    assert (
+        isinstance(res, Sequence)
+        and all((isinstance(elt, dict) for elt in res))
+        and all(
+            check_keys(svv[0][idx], row)
+            for idx, row in enumerate(res))
+        and all(
+            check_values(svv[0][idx], svv[1][idx], svv[2][idx], row)
+            for idx, row in enumerate(res)))
+
+@pytest.mark.unit_test
+@given(svv=st.tuples(
+    st.lists(
+        st.lists(empty_samples,min_size=1),
+        min_size=1),
+    st.lists(st.lists(st.floats(), min_size=1), min_size=1),
+    st.lists(
+        st.lists(st.one_of(st.none(), st.floats()), min_size=1), min_size=1),
+    st.lists(st.lists(st.integers(), min_size=1), min_size=1)))
+def test_dictify_by_samples_with_empty_samples_strings(svv):
+    """
+    Test that `dictify_by_samples` warns the user about providing sample names
+    that are just empty strings.
+    """
+    with pytest.warns(RuntimeWarning):
+        dictify_by_samples(svv)