aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/computations/partial_correlations.py22
-rw-r--r--tests/unit/computations/test_dictify_by_samples.py108
2 files changed, 126 insertions, 4 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index f1757f9..85e3c11 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -6,6 +6,7 @@ GeneNetwork1.
"""
import math
+import warnings
from functools import reduce, partial
from typing import Any, Tuple, Union, Sequence
@@ -78,11 +79,24 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
This implementation extracts code that will find common use, and that will
find use in more than one place.
"""
+ def __build_key_value_pairs__(
+ sample: str, value: Union[float, None],
+ variance: Union[float, None]) -> dict[
+ str, dict[str, Union[str, float, None]]]:
+ smp = sample.strip()
+ if smp == "":
+ warnings.warn(
+ "Empty strings for sample names is not allowed. Returning None",
+ category=RuntimeWarning)
+ return None
+ return (smp, {"sample_name": smp, "value": value, "variance": variance})
+
return tuple(
- {
- sample: {"sample_name": sample, "value": val, "variance": var}
- for sample, val, var in zip(*trait_line)
- } for trait_line in zip(*(samples_vals_vars[0:3])))
+ dict(item for item in
+ (__build_key_value_pairs__(sample, val, var)
+ for sample, val, var in zip(*trait_line))
+ if item is not None)
+ for trait_line in zip(*(samples_vals_vars[0:3])))
def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
"""
diff --git a/tests/unit/computations/test_dictify_by_samples.py b/tests/unit/computations/test_dictify_by_samples.py
new file mode 100644
index 0000000..decc095
--- /dev/null
+++ b/tests/unit/computations/test_dictify_by_samples.py
@@ -0,0 +1,108 @@
+from math import isnan
+import pytest
+from collections.abc import Sequence
+from hypothesis import given, strategies as st
+from gn3.computations.partial_correlations import dictify_by_samples
+
+
+def check_keys(samples, the_dict):
+ """Check that all the keys in `the_dict` are strings in `samples.`"""
+ return all(
+ (key in samples) for key in the_dict.keys())
+
+
+def same(val1, val2):
+ """
+ Check that values are similar.
+
+ In Python3 `float('nan') == float('nan')` always returns False. This
+ function thus, compares similarity rather than direct equality for NaN
+ values.
+
+ `Math.isnan(None)` would throw an error, thus this function takes advantage
+ of the `or` operation's short-circuit to avoid this failure in the case
+ where both values are NoneType values.
+ """
+ return (
+ (val1 is None and val2 is None) or
+ (isnan(val1) and isnan(val2)) or
+ (val1 == val2))
+
+def check_dict_keys_and_values(sample, value, variance, the_dict):
+ """
+ Check the following properties for each dict:
+ - has only `sample_name`, `value` and `variance` as the keys
+ - The values in the dict are the same ones in `sample`, `value` and
+ `variance`.
+ """
+ return (
+ all((key in ("sample_name", "value", "variance"))
+ for key in the_dict.keys()) and
+ the_dict["sample_name"] == sample and
+ same(the_dict["value"], value) and
+ same(the_dict["variance"], variance))
+
+def check_values(samples, values, variances, row):
+ """
+ Check that the values in each dict in `row` are made up from the values in
+ the `samples`, `values`, and `variances` sequences, skipping all values in
+ the `row` for which the sample name is an empty string.
+ """
+ return all(
+ check_dict_keys_and_values(smp, val, var, row[smp])
+ for smp, val, var in zip(samples, values, variances)
+ if smp != "")
+
+non_empty_samples = st.lists(
+ st.text(min_size=1, max_size=15).map(
+ lambda s: s.strip()))
+empty_samples = st.text(
+ alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter(
+ lambda s: len(s.strip()) == 0)
+values = st.lists(st.floats())
+variances = st.lists(st.one_of(st.none(), st.floats()))
+other = st.lists(st.integers())
+
+@given(svv=st.tuples(
+ st.lists(non_empty_samples),
+ st.lists(values),
+ st.lists(variances),
+ st.lists(other)))
+def test_dictifify_by_samples_with_nonempty_samples_strings(svv):
+ """
+ Test for `dictify_by_samples`.
+
+ Given a sequence of sequences of sequences
+
+ Check for the following properties:
+ - Returns a sequence of dicts
+ - Each dicts keys correspond to its index in the zeroth sequence in the
+ top-level sequence
+ """
+ res = dictify_by_samples(svv)
+ assert (
+ isinstance(res, Sequence)
+ and all((isinstance(elt, dict) for elt in res))
+ and all(
+ check_keys(svv[0][idx], row)
+ for idx, row in enumerate(res))
+ and all(
+ check_values(svv[0][idx], svv[1][idx], svv[2][idx], row)
+ for idx, row in enumerate(res)))
+
+@pytest.mark.unit_test
+@given(svv=st.tuples(
+ st.lists(
+ st.lists(empty_samples,min_size=1),
+ min_size=1),
+ st.lists(st.lists(st.floats(), min_size=1), min_size=1),
+ st.lists(
+ st.lists(st.one_of(st.none(), st.floats()), min_size=1), min_size=1),
+ st.lists(st.lists(st.integers(), min_size=1), min_size=1)))
+def test_dictify_by_samples_with_empty_samples_strings(svv):
+ """
+ Test that `dictify_by_samples` warns the user about providing sample names
+ that are just empty strings.
+ """
+ with pytest.warns(RuntimeWarning):
+ dictify_by_samples(svv)