From 67f517aa0f44f55dc691ffd791bf22ef7af0b02c Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 10 Feb 2022 11:36:10 +0300 Subject: Add property tests for `dictify_by_samples` Add property tests using pytest and hypothesis to test that the expected properties hold for the `gn3.computations.partial_correlations.dictify_by_samples` function. --- gn3/computations/partial_correlations.py | 22 ++++- tests/unit/computations/test_dictify_by_samples.py | 108 +++++++++++++++++++++ 2 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 tests/unit/computations/test_dictify_by_samples.py diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index f1757f9..85e3c11 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -6,6 +6,7 @@ GeneNetwork1. """ import math +import warnings from functools import reduce, partial from typing import Any, Tuple, Union, Sequence @@ -78,11 +79,24 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]: This implementation extracts code that will find common use, and that will find use in more than one place. """ + def __build_key_value_pairs__( + sample: str, value: Union[float, None], + variance: Union[float, None]) -> dict[ + str, dict[str, Union[str, float, None]]]: + smp = sample.strip() + if smp == "": + warnings.warn( + "Empty strings for sample names is not allowed. Returning None", + category=RuntimeWarning) + return None + return (smp, {"sample_name": smp, "value": value, "variance": variance}) + return tuple( - { - sample: {"sample_name": sample, "value": val, "variance": var} - for sample, val, var in zip(*trait_line) - } for trait_line in zip(*(samples_vals_vars[0:3]))) + dict(item for item in + (__build_key_value_pairs__(sample, val, var) + for sample, val, var in zip(*trait_line)) + if item is not None) + for trait_line in zip(*(samples_vals_vars[0:3]))) def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]: """ diff --git a/tests/unit/computations/test_dictify_by_samples.py b/tests/unit/computations/test_dictify_by_samples.py new file mode 100644 index 0000000..decc095 --- /dev/null +++ b/tests/unit/computations/test_dictify_by_samples.py @@ -0,0 +1,108 @@ +from math import isnan +import pytest +from collections.abc import Sequence +from hypothesis import given, strategies as st +from gn3.computations.partial_correlations import dictify_by_samples + + +def check_keys(samples, the_dict): + """Check that all the keys in `the_dict` are strings in `samples.`""" + return all( + (key in samples) for key in the_dict.keys()) + + +def same(val1, val2): + """ + Check that values are similar. + + In Python3 `float('nan') == float('nan')` always returns False. This + function thus, compares similarity rather than direct equality for NaN + values. + + `Math.isnan(None)` would throw an error, thus this function takes advantage + of the `or` operation's short-circuit to avoid this failure in the case + where both values are NoneType values. + """ + return ( + (val1 is None and val2 is None) or + (isnan(val1) and isnan(val2)) or + (val1 == val2)) + +def check_dict_keys_and_values(sample, value, variance, the_dict): + """ + Check the following properties for each dict: + - has only `sample_name`, `value` and `variance` as the keys + - The values in the dict are the same ones in `sample`, `value` and + `variance`. + """ + return ( + all((key in ("sample_name", "value", "variance")) + for key in the_dict.keys()) and + the_dict["sample_name"] == sample and + same(the_dict["value"], value) and + same(the_dict["variance"], variance)) + +def check_values(samples, values, variances, row): + """ + Check that the values in each dict in `row` are made up from the values in + the `samples`, `values`, and `variances` sequences, skipping all values in + the `row` for which the sample name is an empty string. + """ + return all( + check_dict_keys_and_values(smp, val, var, row[smp]) + for smp, val, var in zip(samples, values, variances) + if smp != "") + +non_empty_samples = st.lists( + st.text(min_size=1, max_size=15).map( + lambda s: s.strip())) +empty_samples = st.text( + alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter( + lambda s: len(s.strip()) == 0) +values = st.lists(st.floats()) +variances = st.lists(st.one_of(st.none(), st.floats())) +other = st.lists(st.integers()) + +@given(svv=st.tuples( + st.lists(non_empty_samples), + st.lists(values), + st.lists(variances), + st.lists(other))) +def test_dictifify_by_samples_with_nonempty_samples_strings(svv): + """ + Test for `dictify_by_samples`. + + Given a sequence of sequences of sequences + + Check for the following properties: + - Returns a sequence of dicts + - Each dicts keys correspond to its index in the zeroth sequence in the + top-level sequence + """ + res = dictify_by_samples(svv) + assert ( + isinstance(res, Sequence) + and all((isinstance(elt, dict) for elt in res)) + and all( + check_keys(svv[0][idx], row) + for idx, row in enumerate(res)) + and all( + check_values(svv[0][idx], svv[1][idx], svv[2][idx], row) + for idx, row in enumerate(res))) + +@pytest.mark.unit_test +@given(svv=st.tuples( + st.lists( + st.lists(empty_samples,min_size=1), + min_size=1), + st.lists(st.lists(st.floats(), min_size=1), min_size=1), + st.lists( + st.lists(st.one_of(st.none(), st.floats()), min_size=1), min_size=1), + st.lists(st.lists(st.integers(), min_size=1), min_size=1))) +def test_dictify_by_samples_with_empty_samples_strings(svv): + """ + Test that `dictify_by_samples` warns the user about providing sample names + that are just empty strings. + """ + with pytest.warns(RuntimeWarning): + dictify_by_samples(svv) -- cgit v1.2.3