diff options
author | Muriithi Frederick Muriuki | 2021-09-08 06:52:01 +0300 |
---|---|---|
committer | Muriithi Frederick Muriuki | 2021-09-08 07:02:20 +0300 |
commit | 31ca02d1f095c2cc667e5b7d49131d702982f321 (patch) | |
tree | 734f3a379fac6f8648a9228fb6f89f30ea2952b9 | |
parent | d4943f1d01d89a3928c905f80914a23144126c8e (diff) | |
download | genenetwork3-31ca02d1f095c2cc667e5b7d49131d702982f321.tar.gz |
Fix the traits order computations for clustering
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
* gn3/computations/heatmap.py: Fix ordering function
* tests/unit/computations/test_heatmap.py: update test
The order of the traits is important for the clustering algorithm, since the
clustering seems to use the distance of one trait from another to determine
how to order them.
This commit also gets rid of the xoffset argument that is not important to
the ordering, and was used in the older GN1 to determine how to draw the
clustering lines.
-rw-r--r-- | gn3/computations/heatmap.py | 16 | ||||
-rw-r--r-- | tests/unit/computations/test_heatmap.py | 11 |
2 files changed, 9 insertions, 18 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py index ccce385..8727c92 100644 --- a/gn3/computations/heatmap.py +++ b/gn3/computations/heatmap.py @@ -180,28 +180,24 @@ def heatmap_data(traits_names, conn: Any): "traits_filename": traits_filename } -def compute_heatmap_order( - slink_data, xoffset: int = 40, neworder: tuple = tuple()): +def compute_traits_order(slink_data, neworder: tuple = tuple()): """ - Compute the data used for drawing the heatmap proper from `slink_data`. + Compute the order of the traits for clustering from `slink_data`. This function tries to reproduce the creation and update of the `neworder` variable in https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120 and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1 """ - d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder - def __order_maker(norder, slnk_dt): if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int): - return norder + ( - (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1])) + return norder + (slnk_dt[0], slnk_dt[1]) if isinstance(slnk_dt[0], int): - return norder + ((xoffset + 20, slnk_dt[0]), ) + return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1]) if isinstance(slnk_dt[1], int): - return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), ) + return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], ) return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1]) @@ -222,7 +218,7 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list): values = [] rets = [] for order in orders: - temp_val = traits_data_list[order[1]] + temp_val = traits_data_list[order] for i, strain in enumerate(strainlist): if temp_val[i] is not None: strains.append(strain) diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py index 87f8e45..f1bbefc 100644 --- a/tests/unit/computations/test_heatmap.py +++ b/tests/unit/computations/test_heatmap.py @@ -3,7 +3,7 @@ from unittest import TestCase from gn3.computations.heatmap import ( cluster_traits, export_trait_data, - compute_heatmap_order, + compute_traits_order, retrieve_strains_and_values) strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] @@ -158,13 +158,8 @@ class TestHeatmap(TestCase): def test_compute_heatmap_order(self): """Test the orders.""" - for xoff, expected in [ - (40, ((60, 9), (60, 4))), - (30, ((50, 9), (50, 4))), - (20, ((40, 9), (40, 4)))]: - with self.subTest(xoffset=xoff): - self.assertEqual( - compute_heatmap_order(slinked, xoffset=xoff), expected) + self.assertEqual( + compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4)) def test_retrieve_strains_and_values(self): """Test retrieval of strains and values.""" |