aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/computations/partial_correlations.py29
-rw-r--r--tests/unit/computations/test_dictify_by_samples.py113
-rw-r--r--tests/unit/computations/test_partial_correlations.py32
3 files changed, 0 insertions, 174 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index e826a8b..1a37838 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -69,35 +69,6 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
[__process_control__(trait_data) for trait_data in controls],
(tuple(), tuple(), tuple(), tuple()))
-def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
- """
- Build a sequence of dictionaries from a sequence of separate sequences of
- samples, values and variances.
-
- This is a partial migration of
- `web.webqtl.correlation.correlationFunction.fixStrains` function in GN1.
- This implementation extracts code that will find common use, and that will
- find use in more than one place.
- """
- def __build_key_value_pairs__(
- sample: str, value: Union[float, None],
- variance: Union[float, None]) -> dict[
- str, dict[str, Union[str, float, None]]]:
- smp = sample.strip()
- if smp == "":
- warnings.warn(
- "Empty strings for sample names is not allowed. Returning None",
- category=RuntimeWarning)
- return None
- return (smp, {"sample_name": smp, "value": value, "variance": variance})
-
- return tuple(
- dict(item for item in
- (__build_key_value_pairs__(sample, val, var)
- for sample, val, var in zip(*trait_line))
- if item is not None)
- for trait_line in zip(*(samples_vals_vars[0:3])))
-
def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
"""
Corrects sample_names, values and variance such that they all contain only
diff --git a/tests/unit/computations/test_dictify_by_samples.py b/tests/unit/computations/test_dictify_by_samples.py
deleted file mode 100644
index 5cd3eca..0000000
--- a/tests/unit/computations/test_dictify_by_samples.py
+++ /dev/null
@@ -1,113 +0,0 @@
-"""Property tests for `gn3.computations.partial_correlations.dictify_by_samples`
- function"""
-from math import isnan
-from collections.abc import Sequence
-
-import pytest
-from hypothesis import given, strategies as st
-
-from gn3.computations.partial_correlations import dictify_by_samples
-
-
-def check_keys(samples, the_dict):
- """Check that all the keys in `the_dict` are strings in `samples.`"""
- return all(
- (key in samples) for key in the_dict.keys())
-
-
-def same(val1, val2):
- """
- Check that values are similar.
-
- In Python3 `float('nan') == float('nan')` always returns False. This
- function thus, compares similarity rather than direct equality for NaN
- values.
-
- `Math.isnan(None)` would throw an error, thus this function takes advantage
- of the `or` operation's short-circuit to avoid this failure in the case
- where both values are NoneType values.
- """
- return (
- (val1 is None and val2 is None) or
- (isnan(val1) and isnan(val2)) or
- (val1 == val2))
-
-def check_dict_keys_and_values(sample, value, variance, the_dict):
- """
- Check the following properties for each dict:
- - has only `sample_name`, `value` and `variance` as the keys
- - The values in the dict are the same ones in `sample`, `value` and
- `variance`.
- """
- return (
- all((key in ("sample_name", "value", "variance"))
- for key in the_dict.keys()) and
- the_dict["sample_name"] == sample and
- same(the_dict["value"], value) and
- same(the_dict["variance"], variance))
-
-def check_values(samples, values, variances, row):
- """
- Check that the values in each dict in `row` are made up from the values in
- the `samples`, `values`, and `variances` sequences, skipping all values in
- the `row` for which the sample name is an empty string.
- """
- return all(
- check_dict_keys_and_values(smp, val, var, row[smp])
- for smp, val, var in zip(samples, values, variances)
- if smp != "")
-
-generated_non_empty_samples = st.lists(
- st.text(min_size=1, max_size=15).map(
- lambda s: s.strip()))
-generated_empty_samples = st.text(
- alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter(
- lambda s: len(s.strip()) == 0)
-generated_values = st.lists(st.floats())
-generated_variances = st.lists(st.one_of(st.none(), st.floats()))
-generated_other = st.lists(st.integers())
-
-@pytest.mark.unit_test
-@given(svv=st.tuples(
- st.lists(generated_non_empty_samples),
- st.lists(generated_values),
- st.lists(generated_variances),
- st.lists(generated_other)))
-def test_dictifify_by_samples_with_nonempty_samples_strings(svv):
- """
- Test for `dictify_by_samples`.
-
- Given a sequence of sequences of sequences
-
- Check for the following properties:
- - Returns a sequence of dicts
- - Each dicts keys correspond to its index in the zeroth sequence in the
- top-level sequence
- """
- res = dictify_by_samples(svv)
- assert (
- isinstance(res, Sequence)
- and all((isinstance(elt, dict) for elt in res))
- and all(
- check_keys(svv[0][idx], row)
- for idx, row in enumerate(res))
- and all(
- check_values(svv[0][idx], svv[1][idx], svv[2][idx], row)
- for idx, row in enumerate(res)))
-
-@pytest.mark.unit_test
-@given(svv=st.tuples(
- st.lists(
- st.lists(generated_empty_samples,min_size=1),
- min_size=1),
- st.lists(st.lists(st.floats(), min_size=1), min_size=1),
- st.lists(
- st.lists(st.one_of(st.none(), st.floats()), min_size=1), min_size=1),
- st.lists(st.lists(st.integers(), min_size=1), min_size=1)))
-def test_dictify_by_samples_with_empty_samples_strings(svv):
- """
- Test that `dictify_by_samples` warns the user about providing sample names
- that are just empty strings.
- """
- with pytest.warns(RuntimeWarning):
- dictify_by_samples(svv)
diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py
index ee17659..81ddbcc 100644
--- a/tests/unit/computations/test_partial_correlations.py
+++ b/tests/unit/computations/test_partial_correlations.py
@@ -10,7 +10,6 @@ from gn3.computations.partial_correlations import (
fix_samples,
control_samples,
build_data_frame,
- dictify_by_samples,
tissue_correlation,
find_identical_traits,
good_dataset_samples_indexes)
@@ -115,37 +114,6 @@ class TestPartialCorrelations(TestCase):
(6, 4, 3)))
@pytest.mark.unit_test
- def test_dictify_by_samples(self):
- """
- Test that `dictify_by_samples` generates the appropriate dict
-
- Given:
- a sequence of sequences with sample names, values and variances, as
- in the output of `gn3.partial_correlations.control_samples` or
- the output of `gn3.db.traits.export_informative`
- When:
- the sequence is passed as an argument into the
- `gn3.partial_correlations.dictify_by_sample`
- Then:
- return a sequence of dicts with keys being the values of the sample
- names, and each of who's values being sub-dicts with the keys
- 'sample_name', 'value' and 'variance' whose values correspond to the
- values passed in.
- """
- self.assertEqual(
- dictify_by_samples(
- ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"),
- ("BXD12", "BXD16", "BXD19", "BXD2"),
- ("B6cC3-1", "BXD1", "BXD2")),
- ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
- (8.39265, 8.17443, 8.30401, 7.80944),
- (7.51879, 7.77141, 7.80944)),
- ((None, None, None, None, None, None), (None, None, None, None),
- (None, None, None)),
- (6, 4, 3))),
- dictified_control_samples)
-
- @pytest.mark.unit_test
def test_fix_samples(self):
"""
Test that `fix_samples` returns only the common samples