gn3/utility/corr_result_helpers.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

"""module contains helper function for corr results"""

#pylint:disable=C0103
#above disable snake_case for variable tod refactor
def normalize_values(a_values, b_values):
    """
    Trim two lists of values to contain only the values they both share

    Given two lists of sample values, trim each list so that it contains
    only the samples that contain a value in both lists. Also returns
    the number of such samples.

    >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
    ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)

    """
    a_new = []
    b_new = []
    for a, b in zip(a_values, b_values):
        if (a and b is not None):
            a_new.append(a)
            b_new.append(b)
    return a_new, b_new, len(a_new)


def common_keys(a_samples, b_samples):
    """
    >>> a = dict(BXD1 = 9.113, BXD2 = 9.825, BXD14 = 8.985, BXD15 = 9.300)
    >>> b = dict(BXD1 = 9.723, BXD3 = 9.825, BXD14 = 9.124, BXD16 = 9.300)
    >>> sorted(common_keys(a, b))
    ['BXD1', 'BXD14']
    """
    return set(a_samples.keys()).intersection(set(b_samples.keys()))


def normalize_values_with_samples(a_samples, b_samples):
    """function to normalize values with samples"""
    common_samples = common_keys(a_samples, b_samples)
    a_new = {}
    b_new = {}
    for sample in common_samples:
        a_new[sample] = a_samples[sample]
        b_new[sample] = b_samples[sample]

    return a_new, b_new, len(a_new)