diff options
author | Muriithi Frederick Muriuki | 2021-08-17 11:43:32 +0300 |
---|---|---|
committer | Muriithi Frederick Muriuki | 2021-08-17 11:43:32 +0300 |
commit | 41fc5136914548710529cbed7ef370dfb5b4a5c8 (patch) | |
tree | 53991e24c494420514e054335cf661ca3cfba46f | |
parent | d491be2057843921cc67bd1c4b1ae612d9f15d34 (diff) | |
download | genenetwork3-41fc5136914548710529cbed7ef370dfb5b4a5c8.tar.gz |
Test the clustering
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
* gn3/computations/heatmap.py: Fix clustering bugs
* tests/unit/computations/test_heatmap.py: Add new tests. Fix linting issues.
Test and fix the clustering function.
-rw-r--r-- | gn3/computations/heatmap.py | 14 | ||||
-rw-r--r-- | tests/unit/computations/test_heatmap.py | 109 |
2 files changed, 106 insertions, 17 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py index 8a86fe8..3c35029 100644 --- a/gn3/computations/heatmap.py +++ b/gn3/computations/heatmap.py @@ -110,13 +110,13 @@ def cluster_traits(traits_data_list: Sequence[Dict]): https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162 """ def __compute_corr(tdata_i, tdata_j): - if tdata_j[0] < tdata_i[0]: - corr_vals = compute_correlation(tdata_i, tdata_j) - corr = corr_vals[0] - if (1 - corr) < 0: - return 0.0 - return 1 - corr - return 0.0 + if tdata_i[0] == tdata_j[0]: + return 0.0 + corr_vals = compute_correlation(tdata_i[1], tdata_j[1]) + corr = corr_vals[0] + if (1 - corr) < 0: + return 0.0 + return 1 - corr def __cluster(tdata_i): return tuple( diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py index 78303ae..650cb45 100644 --- a/tests/unit/computations/test_heatmap.py +++ b/tests/unit/computations/test_heatmap.py @@ -1,9 +1,38 @@ """Module contains tests for gn3.computations.heatmap""" from unittest import TestCase -from gn3.computations.heatmap import export_trait_data +from gn3.computations.heatmap import cluster_traits, export_trait_data strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] -trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} +trait_data = { + "mysqlid": 36688172, + "data": { + "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, + "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, + "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, + "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, + "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, + "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, + "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, + "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, + "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, + "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, + "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, + "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, + "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, + "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, + "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, + "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, + "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, + "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, + "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, + "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, + "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, + "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, + "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, + "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, + "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, + "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, + "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} class TestHeatmap(TestCase): """Class for testing heatmap computation functions""" @@ -29,10 +58,14 @@ class TestHeatmap(TestCase): argument and the different flags set up """ for dtype, vflag, nflag, expected in [ - ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], ["var", False, False, (None, None, None, None, None, None)], ["var", False, True, (None, None, None, None, None, None)], ["var", True, False, (None, None, None, None, None, None)], @@ -41,10 +74,17 @@ class TestHeatmap(TestCase): ["N", False, True, (None, None, None, None, None, None)], ["N", True, False, (None, None, None, None, None, None)], ["N", True, True, (None, None, None, None, None, None)], - ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)], - ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)], - ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] + ["all", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["all", False, True, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, False, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, True, + (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, + 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] ]: with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): self.assertEqual( @@ -52,3 +92,52 @@ class TestHeatmap(TestCase): trait_data, strainlist, dtype=dtype, var_exists=vflag, n_exists=nflag), expected) + + def test_cluster_traits(self): + """ + Test that the clustering is working as expected. + """ + traits_data_list = [ + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944), + (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398), + (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077), + (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731), + (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505), + (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815), + (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544), + (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991), + (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615), + (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)] + self.assertEqual( + cluster_traits(traits_data_list), + ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245, + 1.5025235756329178, 0.6952839500255574, 1.271661230252733, + 0.2100487290977544, 1.4699690641062024, 0.7934461515867415), + (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204, + 1.4815755944537086, 0.26087293140686374, 1.6939790104301427, + 0.06024619831474998, 1.7430082449189215, 0.4497104244247795), + (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234, + 1.0396738891139845, 0.5278328671176757, 1.6275069061182947, + 0.2636503792482082, 1.739617877037615, 0.7127042590637039), + (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0, + 0.9936846292920328, 1.1169999189889366, 0.6007483980555253, + 1.430209221053372, 0.25879514152086425, 0.9313185954797953), + (1.5025235756329178, 1.4815755944537086, 1.0396738891139845, + 0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244, + 1.4122477962364253, 0.8968250491499363, 1.1683723389247052), + (0.6952839500255574, 0.26087293140686374, 0.5278328671176757, + 1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269, + 0.19179284676938602, 1.4875072385631605, 0.23451785425383564), + (1.271661230252733, 1.6939790104301427, 1.6275069061182947, + 0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0, + 1.6540234785929928, 0.2140799896286565, 1.7413442197913358), + (0.2100487290977544, 0.06024619831474998, 0.2636503792482082, + 1.430209221053372, 1.4122477962364253, 0.19179284676938602, + 1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485), + (1.4699690641062024, 1.7430082449189215, 1.739617877037615, + 0.25879514152086425, 0.8968250491499363, 1.4875072385631605, + 0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216), + (0.7934461515867415, 0.4497104244247795, 0.7127042590637039, + 0.9313185954797953, 1.1683723389247052, 0.23451785425383564, + 1.7413442197913358, 0.33370067057028485, 1.3256191648260216, + 0.0))) |