From fc233942e9118a341001f3357a5d2b1c65187736 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Tue, 17 Aug 2021 08:47:11 +0300 Subject: Add tests and fix errors caught with tests Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/computations/heatmap.py: fix errors * tests/unit/computations/test_heatmap.py: new tests Add new tests with the expected source data format, and expected results. Fix all errors that were caught by running the tests --- gn3/computations/heatmap.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py index a0e778a..8a86fe8 100644 --- a/gn3/computations/heatmap.py +++ b/gn3/computations/heatmap.py @@ -34,11 +34,11 @@ def export_trait_data( """ def __export_all_types(tdata, strain): sample_data = [] - if tdata[strain]["val"]: - sample_data.append(tdata[strain]["val"]) + if tdata[strain]["value"]: + sample_data.append(tdata[strain]["value"]) if var_exists: - if tdata[strain].var: - sample_data.append(tdata[strain]["var"]) + if tdata[strain]["variance"]: + sample_data.append(tdata[strain]["variance"]) else: sample_data.append(None) if n_exists: @@ -58,15 +58,15 @@ def export_trait_data( def __exporter(accumulator, strain): # pylint: disable=[R0911] - if trait_data.has_key(strain): + if strain in trait_data["data"]: if dtype == "val": - return accumulator + (trait_data[strain]["val"], ) + return accumulator + (trait_data["data"][strain]["value"], ) if dtype == "var": - return accumulator + (trait_data[strain]["var"], ) + return accumulator + (trait_data["data"][strain]["variance"], ) if dtype == "N": - return trait_data[strain]["ndata"] + return accumulator + (trait_data["data"][strain]["ndata"], ) if dtype == "all": - return accumulator + __export_all_types(trait_data, strain) + return accumulator + __export_all_types(trait_data["data"], strain) raise KeyError("Type `%s` is incorrect" % dtype) if var_exists and n_exists: return accumulator + (None, None, None) -- cgit v1.2.3 From d10ee60d2200eefb29a22b0a84cd19569235b354 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Tue, 17 Aug 2021 11:05:03 +0300 Subject: Make child sequence a list Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Since the `slink` function assigns values to the `listcopy` variable and its children, this commit ensures that the sequence is a list to allow for the assignment. If the child-sequence is a tuple, that would lead to an exception. --- gn3/computations/slink.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3/computations') diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py index 5953e6b..3d7a576 100644 --- a/gn3/computations/slink.py +++ b/gn3/computations/slink.py @@ -161,7 +161,7 @@ def slink(lists): try: size = len(lists) listindexcopy = list(range(size)) - listscopy = [child[:] for child in lists] + listscopy = [list(child[:]) for child in lists] init_size = size candidate = [] while init_size > 2: -- cgit v1.2.3 From d54e2c4b48b24ebbccc8b2ae183fffd645e21344 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Tue, 17 Aug 2021 11:42:20 +0300 Subject: Fix obvious linting errors * Fix linting errors that do not change the function of the code. --- gn3/api/correlation.py | 4 ++-- gn3/api/general.py | 3 ++- gn3/computations/correlations.py | 4 ++-- wsgi.py | 6 ++++-- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index a3e366e..46121f8 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -79,7 +79,7 @@ def compute_tissue_corr(corr_method="pearson"): target_tissues_dict = tissue_input_data["target_tissues_dict"] results = compute_tissue_correlation(primary_tissue_dict=primary_tissue_dict, - target_tissues_data=target_tissues_dict, - corr_method=corr_method) + target_tissues_data=target_tissues_dict, + corr_method=corr_method) return jsonify(results) diff --git a/gn3/api/general.py b/gn3/api/general.py index 86fb7b7..69ec343 100644 --- a/gn3/api/general.py +++ b/gn3/api/general.py @@ -13,7 +13,8 @@ general = Blueprint("general", __name__) @general.route("/version") def version(): - return jsonify("1.0") + """Get API version.""" + return jsonify("1.0") @general.route("/metadata/upload/", methods=["POST"], strict_slashes=False) diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 1fd3213..8d76c09 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -341,8 +341,8 @@ def compute_all_lit_correlation(conn, trait_lists: List, def compute_tissue_correlation(primary_tissue_dict: dict, - target_tissues_data: dict, - corr_method: str): + target_tissues_data: dict, + corr_method: str): """Function acts as an abstraction for tissue_correlation_for_trait\ required input are target tissue object and primary tissue trait\ target tissues data contains the trait_symbol_dict and symbol_tissue_vals diff --git a/wsgi.py b/wsgi.py index d30bc49..0fcb573 100644 --- a/wsgi.py +++ b/wsgi.py @@ -1,9 +1,11 @@ +""" +WSGI application entry-point. +""" # import main +from gn3.app import create_app print("STARTING WSGI APP") -from gn3.app import create_app - app = create_app() if __name__ == "__main__": -- cgit v1.2.3 From f8be3a85567cc17d50a01382eb10cb3b05436214 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Tue, 17 Aug 2021 11:43:32 +0300 Subject: Test the clustering Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/computations/heatmap.py: Fix clustering bugs * tests/unit/computations/test_heatmap.py: Add new tests. Fix linting issues. Test and fix the clustering function. --- gn3/computations/heatmap.py | 14 ++-- tests/unit/computations/test_heatmap.py | 109 +++++++++++++++++++++++++++++--- 2 files changed, 106 insertions(+), 17 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py index 8a86fe8..3c35029 100644 --- a/gn3/computations/heatmap.py +++ b/gn3/computations/heatmap.py @@ -110,13 +110,13 @@ def cluster_traits(traits_data_list: Sequence[Dict]): https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162 """ def __compute_corr(tdata_i, tdata_j): - if tdata_j[0] < tdata_i[0]: - corr_vals = compute_correlation(tdata_i, tdata_j) - corr = corr_vals[0] - if (1 - corr) < 0: - return 0.0 - return 1 - corr - return 0.0 + if tdata_i[0] == tdata_j[0]: + return 0.0 + corr_vals = compute_correlation(tdata_i[1], tdata_j[1]) + corr = corr_vals[0] + if (1 - corr) < 0: + return 0.0 + return 1 - corr def __cluster(tdata_i): return tuple( diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py index 78303ae..650cb45 100644 --- a/tests/unit/computations/test_heatmap.py +++ b/tests/unit/computations/test_heatmap.py @@ -1,9 +1,38 @@ """Module contains tests for gn3.computations.heatmap""" from unittest import TestCase -from gn3.computations.heatmap import export_trait_data +from gn3.computations.heatmap import cluster_traits, export_trait_data strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] -trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} +trait_data = { + "mysqlid": 36688172, + "data": { + "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, + "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, + "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, + "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, + "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, + "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, + "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, + "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, + "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, + "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, + "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, + "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, + "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, + "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, + "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, + "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, + "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, + "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, + "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, + "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, + "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, + "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, + "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, + "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, + "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, + "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, + "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} class TestHeatmap(TestCase): """Class for testing heatmap computation functions""" @@ -29,10 +58,14 @@ class TestHeatmap(TestCase): argument and the different flags set up """ for dtype, vflag, nflag, expected in [ - ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], ["var", False, False, (None, None, None, None, None, None)], ["var", False, True, (None, None, None, None, None, None)], ["var", True, False, (None, None, None, None, None, None)], @@ -41,10 +74,17 @@ class TestHeatmap(TestCase): ["N", False, True, (None, None, None, None, None, None)], ["N", True, False, (None, None, None, None, None, None)], ["N", True, True, (None, None, None, None, None, None)], - ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)], - ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)], - ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] + ["all", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["all", False, True, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, False, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, True, + (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, + 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] ]: with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): self.assertEqual( @@ -52,3 +92,52 @@ class TestHeatmap(TestCase): trait_data, strainlist, dtype=dtype, var_exists=vflag, n_exists=nflag), expected) + + def test_cluster_traits(self): + """ + Test that the clustering is working as expected. + """ + traits_data_list = [ + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944), + (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398), + (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077), + (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731), + (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505), + (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815), + (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544), + (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991), + (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615), + (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)] + self.assertEqual( + cluster_traits(traits_data_list), + ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245, + 1.5025235756329178, 0.6952839500255574, 1.271661230252733, + 0.2100487290977544, 1.4699690641062024, 0.7934461515867415), + (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204, + 1.4815755944537086, 0.26087293140686374, 1.6939790104301427, + 0.06024619831474998, 1.7430082449189215, 0.4497104244247795), + (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234, + 1.0396738891139845, 0.5278328671176757, 1.6275069061182947, + 0.2636503792482082, 1.739617877037615, 0.7127042590637039), + (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0, + 0.9936846292920328, 1.1169999189889366, 0.6007483980555253, + 1.430209221053372, 0.25879514152086425, 0.9313185954797953), + (1.5025235756329178, 1.4815755944537086, 1.0396738891139845, + 0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244, + 1.4122477962364253, 0.8968250491499363, 1.1683723389247052), + (0.6952839500255574, 0.26087293140686374, 0.5278328671176757, + 1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269, + 0.19179284676938602, 1.4875072385631605, 0.23451785425383564), + (1.271661230252733, 1.6939790104301427, 1.6275069061182947, + 0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0, + 1.6540234785929928, 0.2140799896286565, 1.7413442197913358), + (0.2100487290977544, 0.06024619831474998, 0.2636503792482082, + 1.430209221053372, 1.4122477962364253, 0.19179284676938602, + 1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485), + (1.4699690641062024, 1.7430082449189215, 1.739617877037615, + 0.25879514152086425, 0.8968250491499363, 1.4875072385631605, + 0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216), + (0.7934461515867415, 0.4497104244247795, 0.7127042590637039, + 0.9313185954797953, 1.1683723389247052, 0.23451785425383564, + 1.7413442197913358, 0.33370067057028485, 1.3256191648260216, + 0.0))) -- cgit v1.2.3 From c9ee473ff7797f6bbd7507eb55c772a3a646acee Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 20 Aug 2021 09:04:12 +0300 Subject: Minor correlation fixes (#36) * fix key error for (*tissue_cor) tissue correlation * update tests for tissue correlation * rename speed_compute to fast_compute * pep8 formatting--- gn3/computations/correlations.py | 15 +++++++-------- tests/unit/computations/test_correlation.py | 8 ++++---- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 8d76c09..bb13ff1 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -124,9 +124,9 @@ def filter_shared_sample_keys(this_samplelist, return (this_vals, target_vals) -def speed_compute_all_sample_correlation(this_trait, - target_dataset, - corr_method="pearson") -> List: +def fast_compute_all_sample_correlation(this_trait, + target_dataset, + corr_method="pearson") -> List: """Given a trait data sample-list and target__datasets compute all sample correlation this functions uses multiprocessing if not use the normal fun @@ -362,8 +362,7 @@ def compute_tissue_correlation(primary_tissue_dict: dict, target_tissues_values=target_tissue_vals, trait_id=trait_id, corr_method=corr_method) - tissue_result_dict = {trait_id: tissue_result} - tissues_results.append(tissue_result_dict) + tissues_results.append(tissue_result) return sorted( tissues_results, key=lambda trait_name: -abs(list(trait_name.values())[0]["tissue_corr"])) @@ -386,9 +385,9 @@ def process_trait_symbol_dict(trait_symbol_dict, symbol_tissue_vals_dict) -> Lis return traits_tissue_vals -def speed_compute_tissue_correlation(primary_tissue_dict: dict, - target_tissues_data: dict, - corr_method: str): +def fast_compute_tissue_correlation(primary_tissue_dict: dict, + target_tissues_data: dict, + corr_method: str): """Experimental function that uses multiprocessing for computing tissue correlation diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index f2d65bd..fc52ec1 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -406,10 +406,10 @@ class TestCorrelation(TestCase): target_tissue_data = {"trait_symbol_dict": target_trait_symbol, "symbol_tissue_vals_dict": target_symbol_tissue_vals} - mock_tissue_corr.side_effect = [{"tissue_corr": -0.5, "tissue_p_val": 0.9, - "tissue_number": 3}, - {"tissue_corr": 1.11, "tissue_p_val": 0.2, - "tissue_number": 3}] + mock_tissue_corr.side_effect = [{"1418702_a_at": {"tissue_corr": -0.5, "tissue_p_val": 0.9, + "tissue_number": 3}}, + {"1412_at": {"tissue_corr": 1.11, "tissue_p_val": 0.2, + "tissue_number": 3}}] expected_results = [{"1412_at": {"tissue_corr": 1.11, "tissue_p_val": 0.2, "tissue_number": 3}}, -- cgit v1.2.3