1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
"""
DESCRIPTION:
TODO: Add a description for the module
FUNCTIONS:
slink:
TODO: Describe what the function does...
"""
import logging
from typing import List, Tuple, Union, Sequence
NumType = Union[int, float]
SeqOfNums = Sequence[NumType]
class LengthError(BaseException):
"""Raised whenever child lists/tuples are not the same length as the parent
list of tuple."""
class MirrorError(BaseException):
"""Raised if the distance from child A to child B is not the same as the
distance from child B to child A."""
def __is_list_or_tuple(item):
return type(item) in [list, tuple]
def __raise_valueerror_if_data_is_not_lists_or_tuples(lists):
"""Check that `lists` is a list of lists: If not, raise an exception."""
if (not __is_list_or_tuple(lists)) or (not all(map(__is_list_or_tuple, lists))):
raise ValueError("Expected list or tuple")
def __raise_valueerror_if_lists_empty(lists):
"""Check that the list and its direct children are not empty."""
def empty(lst):
return len(lst) == 0
if (empty(lists)) or not all(map(lambda x: not empty(x), lists)):
raise ValueError("List/Tuple should NOT be empty!")
def __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists):
def len_is_same_as_parent(lst):
return len(lst) == len(lists)
if not all(map(len_is_same_as_parent, lists)):
raise LengthError("All children lists should be same length as the parent.")
def __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists):
def get_child_distance(child):
idx = lists.index(child)
return lists[idx][idx]
def distance_is_zero(dist):
return dist == 0
children_distances = map(get_child_distance, lists)
if not all(map(distance_is_zero, children_distances)):
raise ValueError("Distance of each child list/tuple from itself should be zero!")
def __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists):
"""Check that the distance from A to B, is the same as the distance from B to A.
If the two distances are different, throw an exception."""
inner_coords = range(len(lists))
coords = ((i, j) for i in inner_coords for j in inner_coords)
def __is_same_reversed(coord):
return lists[coord[0]][coord[1]] == lists[coord[1]][coord[0]]
if not all(map(__is_same_reversed, coords)):
raise MirrorError((
"Distance from one child to the other should be the same in both "
"directions."))
def __raise_valueerror_on_negative_distances(lists):
"""Check that distances between 'somethings' are all positive, otherwise,
raise an exception."""
def zero_or_positive(val):
return val >= 0
# flatten lists
flattened = __flatten_list_of_lists(lists)
if not all(map(zero_or_positive, flattened)):
raise ValueError("Distances should be positive.")
def __flatten_list_of_lists(parent):
return [item for child in parent for item in child]
# i and j are Union[SeqOfNums, NumType], but that leads to errors where the
# values of i or j are indexed, since the NumType type is not indexable.
# I don't know how to type this so that it does not fail on running `mypy .`
def nearest(lists: Sequence[SeqOfNums], i, j) -> NumType:
"""
Computes shortest distance between member(s) in `i` and member(s) in `j`.
Description:
This is 'copied' over from genenetwork1, from
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64.
This description should be updated to better describe what 'member' means in
the context where the function is used.
Parameters:
lists (list of lists of distances): Represents a list of members and their
distances from each other.
Each inner list represents the distances the member at that coordinate
is from other members in the list: for example, a member at index 0 with
the values [0, 9, 1, 7] indicates that the member is:
- 0 units of distance away from itself
- 9 units of distance away from member at coordinate 1
- 1 unit of distance away from member at coordinate 2
- 7 units of distance away from member at coordinate 3
i (int or list of ints): Represents the coordinate of a member, or a list of
coordinates of members on the `lists` list.
j (int or list of ints): Represents the coordinate of a member, or a list of
coordinates of members on the `lists` list.
Returns:
int: Represents the shortest distance between member(s) in `i` and member(s)
in `j`."""
#### Guard Functions: Should we do this a different way? ####
__raise_valueerror_if_data_is_not_lists_or_tuples(lists)
__raise_valueerror_if_lists_empty(lists)
__raise_lengtherror_if_child_lists_are_not_same_as_parent(lists)
__raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists)
__raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists)
__raise_valueerror_on_negative_distances(lists)
#### END: Guard Functions ####
if isinstance(i, int) and isinstance(j, int): # From member i to member j
return lists[i][j]
if isinstance(i, int) and __is_list_or_tuple(j):
return min(map(lambda j_new: nearest(lists, i, j_new), j[:-1]))
if isinstance(j, int) and __is_list_or_tuple(i):
return min(map(lambda i_new: nearest(lists, i_new, j), i[:-1]))
if __is_list_or_tuple(i) and __is_list_or_tuple(j):
coordinate_pairs = __flatten_list_of_lists(
[[(itemi, itemj) for itemj in j[:-1]] for itemi in i[:-1]])
return min(map(lambda x: nearest(lists, x[0], x[1]), coordinate_pairs))
raise ValueError("member values (i or j) should be lists/tuples of integers or integers")
# `lists` here could be Sequence[SeqOfNums], but that leads to errors I do not
# understand down the line
# Might have to re-implement the function especially since the errors are thrown
# where `listindexcopy` is involved
def slink(lists):
"""
DESCRIPTION:
TODO: Not quite sure what this function does. Work through the code with a
fine tooth comb, once we understand the context of its use, so as to
give a better description
The name of the function does not clearly establish what the function
does either, meaning, once that is established, the function should be
renamed to give the user an idea of what it does without necessarily
reading through a ton of code.
We should also look into refactoring the function to reduce/eliminate
the multiple levels of nested-loops and conditionals
PARAMETERS:
lists (list of lists of numbers): Give this a better name.
Each item of this list is a list of coordinates of the members in the
group.
What 'member' and 'group' in this context means, is not yet established.
"""
try:
size = len(lists)
listindexcopy = list(range(size))
listscopy = [child[:] for child in lists]
init_size = size
candidate = []
while init_size > 2:
mindist = 1e10
for i in range(init_size):
for j in range(i+1, init_size):
if listscopy[i][j] < mindist:
mindist = listscopy[i][j]
candidate = [[i, j]]
elif listscopy[i][j] == mindist:
mindist = listscopy[i][j]
candidate.append([i, j])
else:
pass
newmem = (
listindexcopy[candidate[0][0]], listindexcopy[candidate[0][1]],
mindist)
listindexcopy.pop(candidate[0][1])
listindexcopy[candidate[0][0]] = newmem
init_size -= 1
for i in range(init_size):
for j in range(i+1, init_size):
listscopy[i][j] = nearest(
lists, listindexcopy[i], listindexcopy[j])
listscopy[j][i] = listscopy[i][j]
listindexcopy.append(
nearest(lists, listindexcopy[0], listindexcopy[1]))
return listindexcopy
except (LengthError, MirrorError, TypeError, IndexError) as exc:
# Look into making the logging log output to the system's
# configured logger(s)
logging.warning("Exception: %s, %s", type(exc), exc)
return []
|