aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations/slink.py
blob: 5953e6b7cb09f7f15eda73e116019a980afa7268 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""
DESCRIPTION:
    TODO: Add a description for the module

FUNCTIONS:
slink:
    TODO: Describe what the function does...
"""
import logging
from typing import Union, Sequence

NumType = Union[int, float]
SeqOfNums = Sequence[NumType]

class LengthError(BaseException):
    """Raised whenever child lists/tuples are not the same length as the parent
    list of tuple."""

class MirrorError(BaseException):
    """Raised if the distance from child A to child B is not the same as the
    distance from child B to child A."""

def __is_list_or_tuple(item):
    return type(item) in [list, tuple]

def __raise_valueerror_if_data_is_not_lists_or_tuples(lists):
    """Check that `lists` is a list of lists: If not, raise an exception."""

    if (not __is_list_or_tuple(lists)) or (not all(map(__is_list_or_tuple, lists))):
        raise ValueError("Expected list or tuple")

def __raise_valueerror_if_lists_empty(lists):
    """Check that the list and its direct children are not empty."""
    def empty(lst):
        return len(lst) == 0
    if (empty(lists)) or not all(map(lambda x: not empty(x), lists)):
        raise ValueError("List/Tuple should NOT be empty!")

def __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists):
    def len_is_same_as_parent(lst):
        return len(lst) == len(lists)
    if not all(map(len_is_same_as_parent, lists)):
        raise LengthError("All children lists should be same length as the parent.")

def __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists):
    def get_child_distance(child):
        idx = lists.index(child)
        return lists[idx][idx]
    def distance_is_zero(dist):
        return dist == 0
    children_distances = map(get_child_distance, lists)
    if not all(map(distance_is_zero, children_distances)):
        raise ValueError("Distance of each child list/tuple from itself should be zero!")

def __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists):
    """Check that the distance from A to B, is the same as the distance from B to A.
If the two distances are different, throw an exception."""
    inner_coords = range(len(lists))
    coords = ((i, j) for i in inner_coords for j in inner_coords)
    def __is_same_reversed(coord):
        return lists[coord[0]][coord[1]] == lists[coord[1]][coord[0]]
    if not all(map(__is_same_reversed, coords)):
        raise MirrorError((
            "Distance from one child to the other should be the same in both "
            "directions."))

def __raise_valueerror_on_negative_distances(lists):
    """Check that distances between 'somethings' are all positive, otherwise,
raise an exception."""
    def zero_or_positive(val):
        return val >= 0
    # flatten lists
    flattened = __flatten_list_of_lists(lists)
    if not all(map(zero_or_positive, flattened)):
        raise ValueError("Distances should be positive.")

def __flatten_list_of_lists(parent):
    return [item for child in parent for item in child]

# i and j are Union[SeqOfNums, NumType], but that leads to errors where the
# values of i or j are indexed, since the NumType type is not indexable.
# I don't know how to type this so that it does not fail on running `mypy .`
def nearest(lists: Sequence[SeqOfNums], i, j) -> NumType:
    """
    Computes shortest distance between member(s) in `i` and member(s) in `j`.

    Description:
    This is 'copied' over from genenetwork1, from
    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64.

    This description should be updated to better describe what 'member' means in
    the context where the function is used.

    Parameters:
    lists (list of lists of distances): Represents a list of members and their
        distances from each other.
        Each inner list represents the distances the member at that coordinate
        is from other members in the list: for example, a member at index 0 with
        the values [0, 9, 1, 7] indicates that the member is:
        - 0 units of distance away from itself
        - 9 units of distance away from member at coordinate 1
        - 1 unit of distance away from member at coordinate 2
        - 7 units of distance away from member at coordinate 3
    i (int or list of ints): Represents the coordinate of a member, or a list of
        coordinates of members on the `lists` list.
    j (int or list of ints): Represents the coordinate of a member, or a list of
        coordinates of members on the `lists` list.

    Returns:
    int: Represents the shortest distance between member(s) in `i` and member(s)
        in `j`."""

    #### Guard Functions: Should we do this a different way? ####
    __raise_valueerror_if_data_is_not_lists_or_tuples(lists)
    __raise_valueerror_if_lists_empty(lists)
    __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists)
    __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists)
    __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists)
    __raise_valueerror_on_negative_distances(lists)
    #### END: Guard Functions ####
    if isinstance(i, int) and isinstance(j, int): # From member i to member j
        return lists[i][j]

    if isinstance(i, int) and __is_list_or_tuple(j):
        return min(map(lambda j_new: nearest(lists, i, j_new), j[:-1]))
    if isinstance(j, int) and __is_list_or_tuple(i):
        return min(map(lambda i_new: nearest(lists, i_new, j), i[:-1]))

    if __is_list_or_tuple(i) and __is_list_or_tuple(j):
        coordinate_pairs = __flatten_list_of_lists(
            [[(itemi, itemj) for itemj in j[:-1]] for itemi in i[:-1]])
        return min(map(lambda x: nearest(lists, x[0], x[1]), coordinate_pairs))

    raise ValueError("member values (i or j) should be lists/tuples of integers or integers")

# `lists` here could be Sequence[SeqOfNums], but that leads to errors I do not
# understand down the line
# Might have to re-implement the function especially since the errors are thrown
# where `listindexcopy` is involved
def slink(lists):
    """
    DESCRIPTION:
    TODO: Not quite sure what this function does. Work through the code with a
        fine tooth comb, once we understand the context of its use, so as to
        give a better description

        The name of the function does not clearly establish what the function
        does either, meaning, once that is established, the function should be
        renamed to give the user an idea of what it does without necessarily
        reading through a ton of code.

        We should also look into refactoring the function to reduce/eliminate
        the multiple levels of nested-loops and conditionals

    PARAMETERS:
    lists (list of lists of numbers): Give this a better name.
        Each item of this list is a list of coordinates of the members in the
        group.
        What 'member' and 'group' in this context means, is not yet established.
    """
    try:
        size = len(lists)
        listindexcopy = list(range(size))
        listscopy = [child[:] for child in lists]
        init_size = size
        candidate = []
        while init_size > 2:
            mindist = 1e10
            for i in range(init_size):
                for j in range(i+1, init_size):
                    if listscopy[i][j] < mindist:
                        mindist = listscopy[i][j]
                        candidate = [[i, j]]
                    elif listscopy[i][j] == mindist:
                        mindist = listscopy[i][j]
                        candidate.append([i, j])
                    else:
                        pass
            newmem = (
                listindexcopy[candidate[0][0]], listindexcopy[candidate[0][1]],
                mindist)
            listindexcopy.pop(candidate[0][1])
            listindexcopy[candidate[0][0]] = newmem

            init_size -= 1
            for i in range(init_size):
                for j in range(i+1, init_size):
                    listscopy[i][j] = nearest(
                        lists, listindexcopy[i], listindexcopy[j])
                    listscopy[j][i] = listscopy[i][j]
        listindexcopy.append(
            nearest(lists, listindexcopy[0], listindexcopy[1]))
        return listindexcopy
    except (LengthError, MirrorError, TypeError, IndexError) as exc:
        # Look into making the logging log output to the system's
        #   configured logger(s)
        logging.warning("Exception: %s, %s", type(exc), exc)
        return []