diff options
-rw-r--r-- | gn3/computations/partial_correlations.py | 29 | ||||
-rw-r--r-- | tests/unit/computations/test_dictify_by_samples.py | 113 | ||||
-rw-r--r-- | tests/unit/computations/test_partial_correlations.py | 32 |
3 files changed, 0 insertions, 174 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index e826a8b..1a37838 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -69,35 +69,6 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): [__process_control__(trait_data) for trait_data in controls], (tuple(), tuple(), tuple(), tuple())) -def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]: - """ - Build a sequence of dictionaries from a sequence of separate sequences of - samples, values and variances. - - This is a partial migration of - `web.webqtl.correlation.correlationFunction.fixStrains` function in GN1. - This implementation extracts code that will find common use, and that will - find use in more than one place. - """ - def __build_key_value_pairs__( - sample: str, value: Union[float, None], - variance: Union[float, None]) -> dict[ - str, dict[str, Union[str, float, None]]]: - smp = sample.strip() - if smp == "": - warnings.warn( - "Empty strings for sample names is not allowed. Returning None", - category=RuntimeWarning) - return None - return (smp, {"sample_name": smp, "value": value, "variance": variance}) - - return tuple( - dict(item for item in - (__build_key_value_pairs__(sample, val, var) - for sample, val, var in zip(*trait_line)) - if item is not None) - for trait_line in zip(*(samples_vals_vars[0:3]))) - def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]: """ Corrects sample_names, values and variance such that they all contain only diff --git a/tests/unit/computations/test_dictify_by_samples.py b/tests/unit/computations/test_dictify_by_samples.py deleted file mode 100644 index 5cd3eca..0000000 --- a/tests/unit/computations/test_dictify_by_samples.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Property tests for `gn3.computations.partial_correlations.dictify_by_samples` - function""" -from math import isnan -from collections.abc import Sequence - -import pytest -from hypothesis import given, strategies as st - -from gn3.computations.partial_correlations import dictify_by_samples - - -def check_keys(samples, the_dict): - """Check that all the keys in `the_dict` are strings in `samples.`""" - return all( - (key in samples) for key in the_dict.keys()) - - -def same(val1, val2): - """ - Check that values are similar. - - In Python3 `float('nan') == float('nan')` always returns False. This - function thus, compares similarity rather than direct equality for NaN - values. - - `Math.isnan(None)` would throw an error, thus this function takes advantage - of the `or` operation's short-circuit to avoid this failure in the case - where both values are NoneType values. - """ - return ( - (val1 is None and val2 is None) or - (isnan(val1) and isnan(val2)) or - (val1 == val2)) - -def check_dict_keys_and_values(sample, value, variance, the_dict): - """ - Check the following properties for each dict: - - has only `sample_name`, `value` and `variance` as the keys - - The values in the dict are the same ones in `sample`, `value` and - `variance`. - """ - return ( - all((key in ("sample_name", "value", "variance")) - for key in the_dict.keys()) and - the_dict["sample_name"] == sample and - same(the_dict["value"], value) and - same(the_dict["variance"], variance)) - -def check_values(samples, values, variances, row): - """ - Check that the values in each dict in `row` are made up from the values in - the `samples`, `values`, and `variances` sequences, skipping all values in - the `row` for which the sample name is an empty string. - """ - return all( - check_dict_keys_and_values(smp, val, var, row[smp]) - for smp, val, var in zip(samples, values, variances) - if smp != "") - -generated_non_empty_samples = st.lists( - st.text(min_size=1, max_size=15).map( - lambda s: s.strip())) -generated_empty_samples = st.text( - alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter( - lambda s: len(s.strip()) == 0) -generated_values = st.lists(st.floats()) -generated_variances = st.lists(st.one_of(st.none(), st.floats())) -generated_other = st.lists(st.integers()) - -@pytest.mark.unit_test -@given(svv=st.tuples( - st.lists(generated_non_empty_samples), - st.lists(generated_values), - st.lists(generated_variances), - st.lists(generated_other))) -def test_dictifify_by_samples_with_nonempty_samples_strings(svv): - """ - Test for `dictify_by_samples`. - - Given a sequence of sequences of sequences - - Check for the following properties: - - Returns a sequence of dicts - - Each dicts keys correspond to its index in the zeroth sequence in the - top-level sequence - """ - res = dictify_by_samples(svv) - assert ( - isinstance(res, Sequence) - and all((isinstance(elt, dict) for elt in res)) - and all( - check_keys(svv[0][idx], row) - for idx, row in enumerate(res)) - and all( - check_values(svv[0][idx], svv[1][idx], svv[2][idx], row) - for idx, row in enumerate(res))) - -@pytest.mark.unit_test -@given(svv=st.tuples( - st.lists( - st.lists(generated_empty_samples,min_size=1), - min_size=1), - st.lists(st.lists(st.floats(), min_size=1), min_size=1), - st.lists( - st.lists(st.one_of(st.none(), st.floats()), min_size=1), min_size=1), - st.lists(st.lists(st.integers(), min_size=1), min_size=1))) -def test_dictify_by_samples_with_empty_samples_strings(svv): - """ - Test that `dictify_by_samples` warns the user about providing sample names - that are just empty strings. - """ - with pytest.warns(RuntimeWarning): - dictify_by_samples(svv) diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py index ee17659..81ddbcc 100644 --- a/tests/unit/computations/test_partial_correlations.py +++ b/tests/unit/computations/test_partial_correlations.py @@ -10,7 +10,6 @@ from gn3.computations.partial_correlations import ( fix_samples, control_samples, build_data_frame, - dictify_by_samples, tissue_correlation, find_identical_traits, good_dataset_samples_indexes) @@ -115,37 +114,6 @@ class TestPartialCorrelations(TestCase): (6, 4, 3))) @pytest.mark.unit_test - def test_dictify_by_samples(self): - """ - Test that `dictify_by_samples` generates the appropriate dict - - Given: - a sequence of sequences with sample names, values and variances, as - in the output of `gn3.partial_correlations.control_samples` or - the output of `gn3.db.traits.export_informative` - When: - the sequence is passed as an argument into the - `gn3.partial_correlations.dictify_by_sample` - Then: - return a sequence of dicts with keys being the values of the sample - names, and each of who's values being sub-dicts with the keys - 'sample_name', 'value' and 'variance' whose values correspond to the - values passed in. - """ - self.assertEqual( - dictify_by_samples( - ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"), - ("BXD12", "BXD16", "BXD19", "BXD2"), - ("B6cC3-1", "BXD1", "BXD2")), - ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944), - (8.39265, 8.17443, 8.30401, 7.80944), - (7.51879, 7.77141, 7.80944)), - ((None, None, None, None, None, None), (None, None, None, None), - (None, None, None)), - (6, 4, 3))), - dictified_control_samples) - - @pytest.mark.unit_test def test_fix_samples(self): """ Test that `fix_samples` returns only the common samples |