diff options
author | Alexander Kabui | 2021-03-16 10:36:58 +0300 |
---|---|---|
committer | GitHub | 2021-03-16 10:36:58 +0300 |
commit | 43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c (patch) | |
tree | 73683272f32cffc860497a93b5c844c272252e67 /tests/unit/computations | |
parent | 995f1dbd081eb64ad177f929615a4edee01cb68f (diff) | |
download | genenetwork3-43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c.tar.gz |
Refactor/clean up correlations (#4)
* initial commit for Refactor/clean-up-correlation
* add python scipy dependency
* initial commit for sample correlation
* initial commit for sample correlation endpoint
* initial commit for integration and unittest
* initial commit for registering correlation blueprint
* add and modify unittest and integration tests for correlation
* Add compute compute_all_sample_corr method for correlation
* add scipy to requirement txt file
* add tissue correlation for trait list
* add unittest for tissue correlation
* add lit correlation for trait list
* add unittests for lit correlation for trait list
* modify lit correlarion for trait list
* add unittests for lit correlation for trait list
* add correlation metho in dynamic url
* add file format for expected structure input while doing sample correlation
* modify input data structure -> add trait id
* update tests for sample r correlation
* add compute all lit correlation method
* add endpoint for computing lit_corr
* add unit and integration tests for computing lit corr
* add /api/correlation/tissue_corr/{corr_method} endpoint for tissue correlation
* add unittest and integration tests for tissue correlation
Co-authored-by: BonfaceKilz <bonfacemunyoki@gmail.com>
Diffstat (limited to 'tests/unit/computations')
3 files changed, 705 insertions, 0 deletions
diff --git a/tests/unit/computations/correlation_test_data/target_dataset.json b/tests/unit/computations/correlation_test_data/target_dataset.json new file mode 100644 index 0000000..f6757b6 --- /dev/null +++ b/tests/unit/computations/correlation_test_data/target_dataset.json @@ -0,0 +1,230 @@ +[ + { + "trait_id":"1425637_at", + "sample_data":{ + "BXD1":7.081, + "BXD2":6.912, + "BXD5":7.153, + "BXD6":6.92, + "BXD8":6.886, + "BXD9":7.406, + "BXD11":6.917, + "BXD12":6.914, + "BXD13":6.964, + "BXD15":6.863, + "BXD16":7.06, + "BXD19":7.002, + "BXD20":7.158, + "BXD21":7.039, + "BXD22":7.036, + "BXD23":6.962, + "BXD24":6.946, + "BXD27":7.084, + "BXD28":7.154, + "BXD29":6.932, + "BXD31":6.994, + "BXD32":6.846, + "BXD33":7.078, + "BXD34":6.94, + "BXD38":6.992, + "BXD39":7.048, + "BXD40":7.14, + "BXD42":6.98, + "BXD43":7.072, + "BXD44":7.045, + "BXD45":6.739, + "BXD48":7.07, + "BXD48a":6.998, + "BXD50":7.053, + "BXD51":6.922, + "BXD55":6.782, + "BXD60":7.042, + "BXD61":6.887, + "BXD62":6.86, + "BXD63":6.815, + "BXD64":7.424, + "BXD65":7.216, + "BXD65a":6.934, + "BXD65b":6.893, + "BXD66":6.935, + "BXD67":6.985, + "BXD68":7.044, + "BXD69":6.908, + "BXD70":6.864, + "BXD73":7.074, + "BXD73a":6.986, + "BXD74":6.914, + "BXD75":6.98, + "BXD76":6.772, + "BXD77":7.121, + "BXD79":6.829, + "BXD83":7.018, + "BXD84":6.948, + "BXD85":7.112, + "BXD86":6.858, + "BXD87":6.865, + "BXD89":7.034, + "BXD90":6.901, + "BXD93":6.97, + "BXD94":7.112, + "BXD98":6.954, + "BXD99":6.912, + "C57BL/6J":7.121, + "DBA/2J":6.821, + "B6D2F1":6.998, + "D2B6F1":6.967 + } + }, + { + "trait_id":"1455376_at", + "trait_sample_data":{ + "BXD1":10.929, + "BXD2":11.279, + "BXD5":11.941, + "BXD6":11.407, + "BXD8":12.048, + "BXD9":11.694, + "BXD11":11.534, + "BXD12":11.048, + "BXD13":12.274, + "BXD15":12.077, + "BXD16":11.91, + "BXD19":11.797, + "BXD20":11.67, + "BXD21":12.062, + "BXD22":12.49, + "BXD23":11.957, + "BXD24":11.766, + "BXD27":13.026, + "BXD28":12.184, + "BXD29":11.792, + "BXD31":12.36, + "BXD32":10.608, + "BXD33":11.817, + "BXD34":11.213, + "BXD38":11.212, + "BXD39":12.023, + "BXD40":12.892, + "BXD42":11.518, + "BXD43":12.306, + "BXD44":11.932, + "BXD45":10.982, + "BXD48":12.055, + "BXD48a":12.572, + "BXD50":11.696, + "BXD51":11.828, + "BXD55":10.523, + "BXD60":11.403, + "BXD61":11.378, + "BXD62":11.887, + "BXD63":11.776, + "BXD64":12.37, + "BXD65":11.122, + "BXD65a":10.853, + "BXD65b":11.46, + "BXD66":11.546, + "BXD67":12.198, + "BXD68":13.21, + "BXD69":11.581, + "BXD70":12.338, + "BXD73":11.876, + "BXD73a":11.75, + "BXD74":11.898, + "BXD75":11.718, + "BXD76":11.926, + "BXD77":12.326, + "BXD79":12.052, + "BXD83":11.478, + "BXD84":11.494, + "BXD85":11.435, + "BXD86":11.476, + "BXD87":11.456, + "BXD89":11.547, + "BXD90":12.452, + "BXD93":12.921, + "BXD94":11.892, + "BXD98":12.614, + "BXD99":13.142, + "C57BL/6J":12.138, + "DBA/2J":11.394, + "B6D2F1":11.615, + "D2B6F1":11.918 + } + }, + { + "trait_id":"1444351_at", + "trait_sample_data":{ + "BXD1":17.847, + "BXD2":15.262, + "BXD5":18.054, + "BXD6":17.24, + "BXD8":15.735, + "BXD9":17.876, + "BXD11":17.359, + "BXD12":17.906, + "BXD13":16.084, + "BXD15":17.173, + "BXD16":15.941, + "BXD19":17.721, + "BXD20":17.548, + "BXD21":17.242, + "BXD22":17.012, + "BXD23":17.139, + "BXD24":17.904, + "BXD27":17.008, + "BXD28":17.441, + "BXD29":17.606, + "BXD31":17.35, + "BXD32":17.859, + "BXD33":17.453, + "BXD34":15.924, + "BXD38":17.271, + "BXD39":18.034, + "BXD40":17.844, + "BXD42":17.444, + "BXD43":17.676, + "BXD44":17.71, + "BXD45":17.059, + "BXD48":17.334, + "BXD48a":17.398, + "BXD50":17.343, + "BXD51":17.514, + "BXD55":14.995, + "BXD60":18.03, + "BXD61":17.628, + "BXD62":17.431, + "BXD63":16.96, + "BXD64":18.199, + "BXD65":17.593, + "BXD65a":17.49, + "BXD65b":17.268, + "BXD66":16.602, + "BXD67":17.306, + "BXD68":17.167, + "BXD69":17.706, + "BXD70":17.287, + "BXD73":17.412, + "BXD73a":16.224, + "BXD74":16.873, + "BXD75":17.202, + "BXD76":16.934, + "BXD77":17.926, + "BXD79":16.55, + "BXD83":17.042, + "BXD84":17.134, + "BXD85":18.021, + "BXD86":17.194, + "BXD87":17.075, + "BXD89":17.511, + "BXD90":17.168, + "BXD93":17.817, + "BXD94":18.04, + "BXD98":16.744, + "BXD99":17.304, + "C57BL/6J":17.084, + "DBA/2J":17.316, + "B6D2F1":16.964, + "D2B6F1":17.086 + } + } +]
\ No newline at end of file diff --git a/tests/unit/computations/correlation_test_data/this_trait_data.json b/tests/unit/computations/correlation_test_data/this_trait_data.json new file mode 100644 index 0000000..7c57fdb --- /dev/null +++ b/tests/unit/computations/correlation_test_data/this_trait_data.json @@ -0,0 +1,76 @@ +{ + "trait_id":"1457784_at", + "trait_sample_data":{ + "BXD1": 6.03, + "BXD2": 6.001, + "BXD5": 6.154, + "BXD6": 6.179, + "BXD8": 6.2, + "BXD9": 6.062, + "BXD11": 6.12, + "BXD12": 6.159, + "BXD13": 6.153, + "BXD15": 6.144, + "BXD16": 6.212, + "BXD19": 6.206, + "BXD20": 6.008, + "BXD21": 6.062, + "BXD22": 6.042, + "BXD23": 6.135, + "BXD24": 6.144, + "BXD27": 6.316, + "BXD28": 6.14, + "BXD29": 6.222, + "BXD31": 6.211, + "BXD32": 5.984, + "BXD33": 6.128, + "BXD34": 6.086, + "BXD38": 6.342, + "BXD39": 6.111, + "BXD40": 6.136, + "BXD42": 6.201, + "BXD43": 5.934, + "BXD44": 6.116, + "BXD45": 6.226, + "BXD48": 6.228, + "BXD48a": 6.16, + "BXD50": 5.92, + "BXD51": 6.227, + "BXD55": 6.137, + "BXD60": 5.932, + "BXD61": 6.18, + "BXD62": 6.188, + "BXD63": 6.134, + "BXD64": 6.102, + "BXD65": 6.258, + "BXD65a": 6.031, + "BXD65b": 6.088, + "BXD66": 6.07, + "BXD67": 6.275, + "BXD68": 6.116, + "BXD69": 6.031, + "BXD70": 6.14, + "BXD73": 6.089, + "BXD73a": 6.195, + "BXD74": 5.971, + "BXD75": 5.972, + "BXD76": 6.125, + "BXD77": 6.107, + "BXD79": 6.288, + "BXD83": 6.119, + "BXD84": 6.102, + "BXD85": 5.959, + "BXD86": 6.249, + "BXD87": 6.172, + "BXD89": 6.13, + "BXD90": 6.162, + "BXD93": 6.19, + "BXD94": 6.068, + "BXD98": 6.137, + "BXD99": 6.252, + "C57BL/6J": 6.255, + "DBA/2J": 6.14, + "B6D2F1": 6.223, + "D2B6F1": 6.038 +} +}
\ No newline at end of file diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py new file mode 100644 index 0000000..84b9330 --- /dev/null +++ b/tests/unit/computations/test_correlation.py @@ -0,0 +1,399 @@ +"""module contains the tests for correlation""" +import unittest +from unittest import TestCase +from unittest import mock + +from collections import namedtuple + +from gn3.computations.correlations import normalize_values +from gn3.computations.correlations import do_bicor +from gn3.computations.correlations import compute_sample_r_correlation +from gn3.computations.correlations import compute_all_sample_correlation +from gn3.computations.correlations import filter_shared_sample_keys +from gn3.computations.correlations import tissue_lit_corr_for_probe_type +from gn3.computations.correlations import tissue_correlation_for_trait_list +from gn3.computations.correlations import lit_correlation_for_trait_list +from gn3.computations.correlations import fetch_lit_correlation_data +from gn3.computations.correlations import query_formatter +from gn3.computations.correlations import map_to_mouse_gene_id +from gn3.computations.correlations import compute_all_lit_correlation +from gn3.computations.correlations import compute_all_tissue_correlation + + +class QueryableMixin: + """base class for db call""" + + def execute(self, query_options): + """base method for execute""" + raise NotImplementedError() + + def fetchone(self): + """base method for fetching one iten""" + raise NotImplementedError() + + def fetchall(self): + """base method for fetch all items""" + raise NotImplementedError() + + +class IllegalOperationError(Exception): + """custom error to raise illegal operation in db""" + + def __init__(self): + super().__init__("Operation not permitted!") + + +class DataBase(QueryableMixin): + """Class for creating db object""" + + def __init__(self): + self.__query_options = None + self.__results = None + + def execute(self, query_options): + """method to execute an sql query""" + self.__query_options = query_options + self.results_generator() + return self + + def fetchone(self): + """method to fetch single item from the db query""" + if self.__results is None: + raise IllegalOperationError() + + return self.__results[0] + + def fetchall(self): + """method for fetching all items from db query""" + if self.__results is None: + raise IllegalOperationError() + return self.__results + + def results_generator(self, expected_results=None): + """private method for generating mock results""" + + if expected_results is None: + self.__results = [namedtuple("lit_coeff", "val")(x*0.1) + for x in range(1, 4)] + else: + self.__results = expected_results + + +class TestCorrelation(TestCase): + """class for testing correlation functions""" + + def test_normalize_values(self): + """function to test normalizing values """ + results = normalize_values([2.3, None, None, 3.2, 4.1, 5], + [3.4, 7.2, 1.3, None, 6.2, 4.1]) + + expected_results = ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3) + + self.assertEqual(results, expected_results) + + def test_bicor(self): + """test for doing biweight mid correlation """ + + results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6]) + + self.assertEqual(results, ([1, 2, 3], [4, 5, 6]) + ) + + @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") + @mock.patch("gn3.computations.correlations.normalize_values") + def test_compute_sample_r_correlation(self, norm_vals, compute_corr): + """test for doing sample correlation gets the cor\ + and p value and rho value using pearson correlation""" + primary_values = [2.3, 4.1, 5] + target_values = [3.4, 6.2, 4.1] + + norm_vals.return_value = ([2.3, 4.1, 5, 4.2, 4, 1.2], + [3.4, 6.2, 4, 1.1, 8, 1.1], 6) + compute_corr.side_effect = [(0.7, 0.3), (-1.0, 0.9), (1, 0.21)] + + pearson_results = compute_sample_r_correlation(corr_method="pearson", + trait_vals=primary_values, + target_samples_vals=target_values) + + spearman_results = compute_sample_r_correlation(corr_method="spearman", + trait_vals=primary_values, + target_samples_vals=target_values) + + bicor_results = compute_sample_r_correlation(corr_method="bicor", + trait_vals=primary_values, + target_samples_vals=target_values) + + self.assertEqual(bicor_results, (1, 0.21, 6)) + self.assertEqual(pearson_results, (0.7, 0.3, 6)) + self.assertEqual(spearman_results, (-1.0, 0.9, 6)) + + self.assertIsInstance( + pearson_results, tuple, "message") + self.assertIsInstance( + spearman_results, tuple, "message") + + def test_filter_shared_sample_keys(self): + """function to tests shared key between two dicts""" + + this_samplelist = { + "C57BL/6J": "6.638", + "DBA/2J": "6.266", + "B6D2F1": "6.494", + "D2B6F1": "6.565", + "BXD2": "6.456" + } + + target_samplelist = { + "DBA/2J": "1.23", + "D2B6F1": "6.565", + "BXD2": "6.456" + + } + + filtered_target_samplelist = ["1.23", "6.565", "6.456"] + filtered_this_samplelist = ["6.266", "6.565", "6.456"] + + results = filter_shared_sample_keys( + this_samplelist=this_samplelist, target_samplelist=target_samplelist) + + self.assertEqual(results, (filtered_this_samplelist, + filtered_target_samplelist)) + + @mock.patch("gn3.computations.correlations.compute_sample_r_correlation") + @mock.patch("gn3.computations.correlations.filter_shared_sample_keys") + def test_compute_all_sample(self, filter_shared_samples, sample_r_corr): + """given target dataset compute all sample r correlation""" + + filter_shared_samples.return_value = (["1.23", "6.565", "6.456"], [ + "6.266", "6.565", "6.456"]) + sample_r_corr.return_value = ([-1.0, 0.9, 6]) + + this_trait_data = { + "trait_id": "1455376_at", + "trait_sample_data": { + "C57BL/6J": "6.638", + "DBA/2J": "6.266", + "B6D2F1": "6.494", + "D2B6F1": "6.565", + "BXD2": "6.456" + }} + + traits_dataset = [ + { + "trait_id": "1419792_at", + "trait_sample_data": { + "DBA/2J": "1.23", + "D2B6F1": "6.565", + "BXD2": "6.456" + } + } + ] + + sample_all_results = [{"1419792_at": {"corr_coeffient": -1.0, + "p_value": 0.9, + "num_overlap": 6}}] + # ?corr_method: str, trait_vals, target_samples_vals + + self.assertEqual(compute_all_sample_correlation( + this_trait=this_trait_data, target_dataset=traits_dataset), sample_all_results) + sample_r_corr.assert_called_once_with( + corr_method="pearson", trait_vals=['1.23', '6.565', '6.456'], + target_samples_vals=['6.266', '6.565', '6.456']) + filter_shared_samples.assert_called_once_with( + this_trait_data.get("trait_sample_data"), traits_dataset[0].get("trait_sample_data")) + + @unittest.skip("not implemented") + def test_tissue_lit_corr_for_probe_type(self): + """tests for doing tissue and lit correlation for trait list\ + if both the dataset and target dataset are probeset runs\ + on after initial correlation has been done""" + + results = tissue_lit_corr_for_probe_type( + corr_type="tissue", top_corr_results={}) + + self.assertEqual(results, (None, None)) + + @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") + def test_tissue_correlation_for_trait_list(self, mock_compute_corr_coeff): + """test given a primary tissue values for a trait and and a list of\ + target tissues for traits do the tissue correlation for them""" + + primary_tissue_values = [1.1, 1.5, 2.3] + target_tissues_values = [1, 2, 3] + mock_compute_corr_coeff.side_effect = [(0.4, 0.9), (-0.2, 0.91)] + expected_tissue_results = { + 'tissue_corr': 0.4, 'p_value': 0.9, "tissue_number": 3} + + tissue_results = tissue_correlation_for_trait_list( + primary_tissue_values, target_tissues_values, + corr_method="pearson", compute_corr_p_value=mock_compute_corr_coeff) + + self.assertEqual(tissue_results, expected_tissue_results) + + @mock.patch("gn3.computations.correlations.fetch_lit_correlation_data") + @mock.patch("gn3.computations.correlations.map_to_mouse_gene_id") + def test_lit_correlation_for_trait_list(self, mock_mouse_gene_id, fetch_lit_data): + """fetch results from db call for lit correlation given a trait list\ + after doing correlation""" + + target_trait_lists = [{"gene_id": 15}, + {"gene_id": 17}, + {"gene_id": 11}] + mock_mouse_gene_id.side_effect = [12, 11, 18, 16, 20] + + database_instance = namedtuple("database", "execute")("fetchone") + + fetch_lit_data.side_effect = [(15, 9), (17, 8), (11, 12)] + + lit_results = lit_correlation_for_trait_list( + database=database_instance, target_trait_lists=target_trait_lists, + species="rat", trait_gene_id="12") + + expected_results = [{"gene_id": 15, "lit_corr": 9}, { + "gene_id": 17, "lit_corr": 8}, {"gene_id": 11, "lit_corr": 12}] + + self.assertEqual(lit_results, expected_results) + + def test_fetch_lit_correlation_data(self): + """test for fetching lit correlation data from\ + the database where the input and mouse geneid are none""" + + database_instance = DataBase() + results = fetch_lit_correlation_data(database=database_instance, + gene_id="1", + input_mouse_gene_id=None, + mouse_gene_id=None) + + self.assertEqual(results, ("1", 0)) + + def test_fetch_lit_correlation_data_db_query(self): + """test for fetching lit corr coefficent givent the input\ + input trait mouse gene id and mouse gene id""" + + database_instance = DataBase() + expected_results = ("1", 0.1) + + lit_results = fetch_lit_correlation_data(database=database_instance, + gene_id="1", + input_mouse_gene_id="20", + mouse_gene_id="15") + + self.assertEqual(expected_results, lit_results) + + def test_query_lit_correlation_for_db_empty(self): + """test that corr coeffient returned is 0 given the\ + db value if corr coefficient is empty""" + database_instance = mock.Mock() + database_instance.execute.return_value.fetchone.return_value = None + + lit_results = fetch_lit_correlation_data(database=database_instance, + input_mouse_gene_id="12", + gene_id="16", + mouse_gene_id="12") + + self.assertEqual(lit_results, ("16", 0)) + + def test_query_formatter(self): + """test for formatting a query given the query string and also the\ + values""" + query = """ + SELECT VALUE + FROM LCorr + WHERE GeneId1='%s' and + GeneId2='%s' + """ + + expected_formatted_query = """ + SELECT VALUE + FROM LCorr + WHERE GeneId1='20' and + GeneId2='15' + """ + + mouse_gene_id = "20" + input_mouse_gene_id = "15" + + query_values = (mouse_gene_id, input_mouse_gene_id) + + formatted_query = query_formatter(query, *query_values) + + self.assertEqual(formatted_query, expected_formatted_query) + + def test_query_formatter_no_query_values(self): + """test for formatting a query where there are no\ + string placeholder""" + query = """SELECT * FROM USERS""" + formatted_query = query_formatter(query) + + self.assertEqual(formatted_query, query) + + def test_map_to_mouse_gene_id(self): + """test for converting a gene id to mouse geneid\ + given a species which is not mouse""" + database_instance = mock.Mock() + test_data = [("Human", 14), (None, 9), ("Mouse", 15), ("Rat", 14)] + + database_results = [namedtuple("mouse_id", "mouse")(val) + for val in range(12, 20)] + results = [] + + database_instance.execute.return_value.fetchone.side_effect = database_results + expected_results = [12, None, 13, 14] + for (species, gene_id) in test_data: + + mouse_gene_id_results = map_to_mouse_gene_id( + database=database_instance, species=species, gene_id=gene_id) + results.append(mouse_gene_id_results) + + self.assertEqual(results, expected_results) + + @mock.patch("gn3.computations.correlations.lit_correlation_for_trait_list") + def test_compute_all_lit_correlation(self, mock_lit_corr): + """test for compute all lit correlation which acts\ + as an abstraction for lit_correlation_for_trait_list + and is used in the api/correlation/lit""" + + database = mock.Mock() + + expected_mocked_lit_results = [{"gene_id": 11, "lit_corr": 9}, { + "gene_id": 17, "lit_corr": 8}] + + mock_lit_corr.side_effect = expected_mocked_lit_results + + lit_correlation_results = compute_all_lit_correlation( + database_instance=database, trait_lists=[{"gene_id": 11}], + species="rat", gene_id=12) + + expected_results = { + "lit_results": {"gene_id": 11, "lit_corr": 9} + } + + self.assertEqual(lit_correlation_results, expected_results) + + @mock.patch("gn3.computations.correlations.tissue_correlation_for_trait_list") + def test_compute_all_tissue_correlation(self, mock_tissue_corr): + """test for compute all tissue corelation which abstracts + api calling the tissue_correlation for trait_list""" + + primary_tissue_dict = {"trait_id": "1419792_at", + "tissue_values": [1, 2, 3, 4, 5]} + + target_tissue_dict = [{"trait_id": "1418702_a_at", "tissue_values": [1, 2, 3]}, + {"trait_id": "1412_at", "tissue_values": [1, 2, 3]}] + + mock_tissue_corr.side_effect = [{"tissue_corr": -0.5, "p_value": 0.9, "tissue_number": 3}, + {"tissue_corr": 1.11, "p_value": 0.2, "tissue_number": 3}] + + expected_results = {"1418702_a_at": + {"tissue_corr": -0.5, "p_value": 0.9, "tissue_number": 3}, + "1412_at": + {"tissue_corr": 1.11, "p_value": 0.2, "tissue_number": 3}} + + results = compute_all_tissue_correlation( + primary_tissue_dict=primary_tissue_dict, + target_tissues_dict_list=target_tissue_dict, + corr_method="pearson") + + self.assertEqual(mock_tissue_corr.call_count, 2) + + self.assertEqual(results, expected_results) |