aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-09-08 06:52:01 +0300
committerMuriithi Frederick Muriuki2021-09-08 07:02:20 +0300
commit31ca02d1f095c2cc667e5b7d49131d702982f321 (patch)
tree734f3a379fac6f8648a9228fb6f89f30ea2952b9
parentd4943f1d01d89a3928c905f80914a23144126c8e (diff)
downloadgenenetwork3-31ca02d1f095c2cc667e5b7d49131d702982f321.tar.gz
Fix the traits order computations for clustering
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/computations/heatmap.py: Fix ordering function * tests/unit/computations/test_heatmap.py: update test The order of the traits is important for the clustering algorithm, since the clustering seems to use the distance of one trait from another to determine how to order them. This commit also gets rid of the xoffset argument that is not important to the ordering, and was used in the older GN1 to determine how to draw the clustering lines.
-rw-r--r--gn3/computations/heatmap.py16
-rw-r--r--tests/unit/computations/test_heatmap.py11
2 files changed, 9 insertions, 18 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index ccce385..8727c92 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -180,28 +180,24 @@ def heatmap_data(traits_names, conn: Any):
"traits_filename": traits_filename
}
-def compute_heatmap_order(
- slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+def compute_traits_order(slink_data, neworder: tuple = tuple()):
"""
- Compute the data used for drawing the heatmap proper from `slink_data`.
+ Compute the order of the traits for clustering from `slink_data`.
This function tries to reproduce the creation and update of the `neworder`
variable in
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
"""
- d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
-
def __order_maker(norder, slnk_dt):
if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
- return norder + (
- (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+ return norder + (slnk_dt[0], slnk_dt[1])
if isinstance(slnk_dt[0], int):
- return norder + ((xoffset + 20, slnk_dt[0]), )
+ return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
if isinstance(slnk_dt[1], int):
- return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+ return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
@@ -222,7 +218,7 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
values = []
rets = []
for order in orders:
- temp_val = traits_data_list[order[1]]
+ temp_val = traits_data_list[order]
for i, strain in enumerate(strainlist):
if temp_val[i] is not None:
strains.append(strain)
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 87f8e45..f1bbefc 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -3,7 +3,7 @@ from unittest import TestCase
from gn3.computations.heatmap import (
cluster_traits,
export_trait_data,
- compute_heatmap_order,
+ compute_traits_order,
retrieve_strains_and_values)
strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
@@ -158,13 +158,8 @@ class TestHeatmap(TestCase):
def test_compute_heatmap_order(self):
"""Test the orders."""
- for xoff, expected in [
- (40, ((60, 9), (60, 4))),
- (30, ((50, 9), (50, 4))),
- (20, ((40, 9), (40, 4)))]:
- with self.subTest(xoffset=xoff):
- self.assertEqual(
- compute_heatmap_order(slinked, xoffset=xoff), expected)
+ self.assertEqual(
+ compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
def test_retrieve_strains_and_values(self):
"""Test retrieval of strains and values."""