about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-07-23 09:18:40 +0300
committerMuriithi Frederick Muriuki2021-07-23 09:18:40 +0300
commit278f1a18b59b1cadd04c50a9af35b5aece4d722d (patch)
tree7cf307f6fb0443c644fd051e51d65688c3846cf3
parent8705e9186051fb5d11d150991c49f6f73056183b (diff)
downloadgenenetwork3-278f1a18b59b1cadd04c50a9af35b5aece4d722d.tar.gz
Add data examples for `slink`. Implement function.
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/slink.py: Copy the function, mostly verbatim from
  genenetwork1. See:
  https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L107-L138

* tests/unit/computations/test_slink.py: Add a test with some example data to
  test that the implementation gives the same results as that in genenetwork1
-rw-r--r--gn3/computations/slink.py48
-rw-r--r--tests/unit/computations/test_slink.py11
2 files changed, 58 insertions, 1 deletions
diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py
index 59d0975..8d51f29 100644
--- a/gn3/computations/slink.py
+++ b/gn3/computations/slink.py
@@ -123,9 +123,55 @@ def nearest(lists, i, j):
 
 def slink(lists):
     """
+    DESCRIPTION:
+    TODO: Not quite sure what this function does. Work through the code with a
+        fine tooth comb, once we understand the context of its use, so as to
+        give a better description
+
+        The name of the function does not clearly establish what the function
+        does either, meaning, once that is established, the function should be
+        renamed to give the user an idea of what it does without necessarily
+        reading through a ton of code.
+
+        We should also look into refactoring the function to reduce/eliminate
+        the multiple levels of nested-loops and conditionals
+
+    PARAMETERS:
+    lists (list of lists of numbers): Give this a better name.
+        Each item of this list is a list of coordinates of the members in the
+        group.
+        What 'member' and 'group' in this context means, is not yet established.
     """
     try:
-        nearest(lists, 1, 2)
+        size = len(lists)
+        listindex = range(size)
+        listindexcopy = list(range(size))
+        listscopy = [[item for item in child] for child in lists]
+        initSize = size
+        candidate = []
+        while initSize >2:
+            mindist = 1e10
+            for i in range(initSize):
+                for j in range(i+1,initSize):
+                    if listscopy[i][j] < mindist:
+                        mindist =  listscopy[i][j]
+                        candidate=[[i,j]]
+                    elif listscopy[i][j] == mindist:
+                        mindist =  listscopy[i][j]
+                        candidate.append([i,j])
+                    else:
+                        pass
+            newmem = (listindexcopy[candidate[0][0]],listindexcopy[candidate[0][1]],mindist)
+            listindexcopy.pop(candidate[0][1])
+            listindexcopy[candidate[0][0]] = newmem
+
+            initSize -= 1
+            for i in range(initSize):
+                for j in range(i+1,initSize):
+                    listscopy[i][j] = nearest(lists,listindexcopy[i],listindexcopy[j])
+                    listscopy[j][i] = listscopy[i][j]
+        listindexcopy.append(nearest(lists,listindexcopy[0],listindexcopy[1]))
+        return listindexcopy
     except Exception as e:
         # TODO: Look into making the logging log output to the system's
         #    configured logger(s)
diff --git a/tests/unit/computations/test_slink.py b/tests/unit/computations/test_slink.py
index da17ac3..5627767 100644
--- a/tests/unit/computations/test_slink.py
+++ b/tests/unit/computations/test_slink.py
@@ -207,3 +207,14 @@ class TestSlink(TestCase):
         for data in [1, "test", [], 2.945, nearest, [0]]:
             with self.subTest(data=data):
                 self.assertEqual(slink(data), [])
+
+    def test_slink_with_data(self):
+        for data, expected in [
+                [[[0,9],[9,0]],[0,1,9]],
+                [[[0,9,3],[9,0,7],[3,7,0]],[(0,2,3),1,7]],
+                [[[0,9,3,6],[9,0,7,5],[3,7,0,9],[6,5,9,0]],[(0,2,3),(1,3,5),6]],
+                [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],
+                  [11,10,2,8,0]],
+                 [(0,(2,4,2),3),(1,3,5),6]]]:
+            with self.subTest(data=data):
+                self.assertEqual(slink(data), expected)