"""Property tests for `gn3.computations.partial_correlations.dictify_by_samples`
 function"""
from math import isnan
from collections.abc import Sequence

import pytest
from hypothesis import given, strategies as st

from gn3.computations.partial_correlations import dictify_by_samples


def check_keys(samples, the_dict):
    """Check that all the keys in `the_dict` are strings in `samples.`"""
    return all(
        (key in samples) for key in the_dict.keys())


def same(val1, val2):
    """
    Check that values are similar.

    In Python3 `float('nan') == float('nan')` always returns False. This
    function thus, compares similarity rather than direct equality for NaN
    values.

    `Math.isnan(None)` would throw an error, thus this function takes advantage
    of the `or` operation's short-circuit to avoid this failure in the case
    where both values are NoneType values.
    """
    return (
        (val1 is None and val2 is None) or
        (isnan(val1) and isnan(val2)) or
        (val1 == val2))

def check_dict_keys_and_values(sample, value, variance, the_dict):
    """
    Check the following properties for each dict:
    - has only `sample_name`, `value` and `variance` as the keys
    - The values in the dict are the same ones in `sample`, `value` and
      `variance`.
    """
    return (
        all((key in ("sample_name", "value", "variance"))
            for key in the_dict.keys()) and
        the_dict["sample_name"] == sample and
        same(the_dict["value"], value) and
        same(the_dict["variance"], variance))

def check_values(samples, values, variances, row):
    """
    Check that the values in each dict in `row` are made up from the values in
    the `samples`, `values`, and `variances` sequences, skipping all values in
    the `row` for which the sample name is an empty string.
    """
    return all(
        check_dict_keys_and_values(smp, val, var, row[smp])
        for smp, val, var in zip(samples, values, variances)
        if smp != "")

generated_non_empty_samples = st.lists(
    st.text(min_size=1, max_size=15).map(
        lambda s: s.strip()))
generated_empty_samples = st.text(
    alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter(
        lambda s: len(s.strip()) == 0)
generated_values = st.lists(st.floats())
generated_variances = st.lists(st.one_of(st.none(), st.floats()))
generated_other = st.lists(st.integers())

@pytest.mark.unit_test
@given(svv=st.tuples(
    st.lists(generated_non_empty_samples),
    st.lists(generated_values),
    st.lists(generated_variances),
    st.lists(generated_other)))
def test_dictifify_by_samples_with_nonempty_samples_strings(svv):
    """
    Test for `dictify_by_samples`.

    Given a sequence of sequences of sequences

    Check for the following properties:
    - Returns a sequence of dicts
    - Each dicts keys correspond to its index in the zeroth sequence in the
      top-level sequence
    """
    res = dictify_by_samples(svv)
    assert (
        isinstance(res, Sequence)
        and all((isinstance(elt, dict) for elt in res))
        and all(
            check_keys(svv[0][idx], row)
            for idx, row in enumerate(res))
        and all(
            check_values(svv[0][idx], svv[1][idx], svv[2][idx], row)
            for idx, row in enumerate(res)))

@pytest.mark.unit_test
@given(svv=st.tuples(
    st.lists(
        st.lists(generated_empty_samples,min_size=1),
        min_size=1),
    st.lists(st.lists(st.floats(), min_size=1), min_size=1),
    st.lists(
        st.lists(st.one_of(st.none(), st.floats()), min_size=1), min_size=1),
    st.lists(st.lists(st.integers(), min_size=1), min_size=1)))
def test_dictify_by_samples_with_empty_samples_strings(svv):
    """
    Test that `dictify_by_samples` warns the user about providing sample names
    that are just empty strings.
    """
    with pytest.warns(RuntimeWarning):
        dictify_by_samples(svv)