From 278f1a18b59b1cadd04c50a9af35b5aece4d722d Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Fri, 23 Jul 2021 09:18:40 +0300 Subject: Add data examples for `slink`. Implement function. Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/computations/slink.py: Copy the function, mostly verbatim from genenetwork1. See: https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L107-L138 * tests/unit/computations/test_slink.py: Add a test with some example data to test that the implementation gives the same results as that in genenetwork1 --- gn3/computations/slink.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'gn3/computations/slink.py') diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py index 59d0975..8d51f29 100644 --- a/gn3/computations/slink.py +++ b/gn3/computations/slink.py @@ -123,9 +123,55 @@ def nearest(lists, i, j): def slink(lists): """ + DESCRIPTION: + TODO: Not quite sure what this function does. Work through the code with a + fine tooth comb, once we understand the context of its use, so as to + give a better description + + The name of the function does not clearly establish what the function + does either, meaning, once that is established, the function should be + renamed to give the user an idea of what it does without necessarily + reading through a ton of code. + + We should also look into refactoring the function to reduce/eliminate + the multiple levels of nested-loops and conditionals + + PARAMETERS: + lists (list of lists of numbers): Give this a better name. + Each item of this list is a list of coordinates of the members in the + group. + What 'member' and 'group' in this context means, is not yet established. """ try: - nearest(lists, 1, 2) + size = len(lists) + listindex = range(size) + listindexcopy = list(range(size)) + listscopy = [[item for item in child] for child in lists] + initSize = size + candidate = [] + while initSize >2: + mindist = 1e10 + for i in range(initSize): + for j in range(i+1,initSize): + if listscopy[i][j] < mindist: + mindist = listscopy[i][j] + candidate=[[i,j]] + elif listscopy[i][j] == mindist: + mindist = listscopy[i][j] + candidate.append([i,j]) + else: + pass + newmem = (listindexcopy[candidate[0][0]],listindexcopy[candidate[0][1]],mindist) + listindexcopy.pop(candidate[0][1]) + listindexcopy[candidate[0][0]] = newmem + + initSize -= 1 + for i in range(initSize): + for j in range(i+1,initSize): + listscopy[i][j] = nearest(lists,listindexcopy[i],listindexcopy[j]) + listscopy[j][i] = listscopy[i][j] + listindexcopy.append(nearest(lists,listindexcopy[0],listindexcopy[1])) + return listindexcopy except Exception as e: # TODO: Look into making the logging log output to the system's # configured logger(s) -- cgit v1.2.3