diff options
Diffstat (limited to 'tests/unit/computations/test_datasets.py')
-rw-r--r-- | tests/unit/computations/test_datasets.py | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py new file mode 100644 index 0000000..f9e9c2b --- /dev/null +++ b/tests/unit/computations/test_datasets.py @@ -0,0 +1,219 @@ +"""Module contains tests from datasets""" +import json + +from unittest import TestCase +from unittest import mock + +from collections import namedtuple + +from gn3.computations.datasets import retrieve_trait_sample_data +from gn3.computations.datasets import get_query_for_dataset_sample +from gn3.computations.datasets import fetch_from_db_sample_data +from gn3.computations.datasets import create_dataset +from gn3.computations.datasets import dataset_creator_store +from gn3.computations.datasets import dataset_type_getter +from gn3.computations.datasets import fetch_dataset_type_from_gn2_api +from gn3.computations.datasets import fetch_dataset_sample_id +from gn3.computations.datasets import divide_into_chunks +from gn3.computations.datasets import get_traits_data + + +class TestDatasets(TestCase): + """Class contains tests for datasets""" + + @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data") + def test_retrieve_trait_sample_data(self, mock_fetch_sample_results): + """Test retrieving sample data\ + for trait from the dataset + """ + trait_name = "1419792_at" + dataset_id = "HC_M2_0606_P&" + dataset_type = "Publish" + + database = mock.Mock() + + dataset = { + "id": dataset_id, + "type": dataset_type, + "name": dataset_id + } + + fetch_results = [('BXD32', 8.001, None, None, 'BXD32')] + + mock_fetch_sample_results.return_value = fetch_results + + results = retrieve_trait_sample_data( + dataset, trait_name, database) + self.assertEqual(mock_fetch_sample_results.call_count, 1) + self.assertEqual(results, fetch_results) + + def test_query_for_dataset_sample(self): + """Test for getting query for sample data""" + + no_results = get_query_for_dataset_sample("does not exists") + + query_exists = get_query_for_dataset_sample("Publish") + + self.assertEqual(no_results, None) + self.assertIsInstance(query_exists, str) + + def test_fetch_from_db_sample_data(self): + """Test for function that fetches sample\ + results from the database + """ + + database_results = [('BXD31', 8.001, None, None, 'BXD31'), + ('BXD32', 7.884, None, None, 'BXD32'), + ('BXD42', 7.682, None, None, 'BXD42'), + ('BXD42', 7.682, None, None, 'BXD42'), + ('BXD40', 7.945, None, None, 'BXD40'), + ('BXD43', 7.873, None, None, 'BXD43') + ] + + database = mock.Mock() + db_cursor = mock.Mock() + db_cursor.execute.return_value = 6 + db_cursor.fetchall.return_value = database_results + database.cursor.return_value = db_cursor + + mock_pheno_query = """ + SELECT + Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2 + WHERE + PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = 1419792_at AND + PublishFreeze.Id = '12' AND PublishData.StrainId = Strain.Id + Order BY + Strain.Name + """ + fetch_results = fetch_from_db_sample_data(mock_pheno_query, database) + + self.assertEqual(fetch_results, database_results) + + @mock.patch("gn3.computations.datasets.dataset_creator_store") + @mock.patch("gn3.computations.datasets.dataset_type_getter") + def test_create_dataset(self, mock_dataset_type, mock_store): + """Test function that creates/fetches required dataset\ + can either be published phenotype,genotype,Microarray or\ + user defined ->Temp + """ + probe_name = "HC_M2_0606_P" + probe_type = "ProbeSet" + + mock_dataset_creator = namedtuple( + 'ProbeSet', ["dataset_name", "dataset_type"]) + + mock_store.return_value = mock_dataset_creator + mock_dataset_type.return_value = probe_type + dataset = create_dataset( + dataset_type=None, dataset_name=probe_name) + + self.assertEqual(dataset.dataset_name, probe_name) + self.assertEqual(dataset.dataset_type, probe_type) + + def test_dataset_creator_store(self): + """Test for functions that actual + function to create differerent \ + datasets + """ + results = dataset_creator_store("ProbeSet") + + self.assertTrue(results) + + def test_dataset_type_getter(self): + """Test for fetching type of dataset given\ + the dataset name + """ + + redis_instance = mock.Mock() + # fetched in redis + redis_instance.get.return_value = "ProbeSet" + results = dataset_type_getter("HC_M2_0_P", redis_instance) + self.assertEqual(results, "ProbeSet") + + @mock.patch("gn3.computations.datasets.requests") + def test_fetch_dataset_type_from_gn2_api(self, mock_request): + """Test for function that test fetching\ + all datasets from gn2 api in order to store\ + in redis + """ + + expected_json_results = {"datasets": { + "arabidopsis": { + "BayXSha": { + "Genotypes": [ + [ + "None", + "BayXShaGeno", + "BayXSha Genotypes" + ] + ], + "Phenotypes": [ + [ + "642", + "BayXShaPublish", + "BayXSha Published Phenotypes" + ] + ] + } + } + }} + + request_results = json.dumps(expected_json_results) + mock_request.get.return_value.content = request_results + results = fetch_dataset_type_from_gn2_api("HC_M2_0_P") + expected_results = { + "BayXShaGeno": "Geno", + "642": "Publish" + } + + self.assertEqual(expected_results, results) + + def test_fetch_dataset_sample_id(self): + """Get from the database the sample\ + id if only in the samplelists + """ + + expected_results = {"B6D2F1": 1, "BXD1": 4, "BXD11": 10, + "BXD12": 11, "BXD13": 12, "BXD15": 14, "BXD16": 15} + + database_instance = mock.Mock() + database_cursor = mock.Mock() + + database_cursor.execute.return_value = 5 + database_cursor.fetchall.return_value = list(expected_results.items()) + database_instance.cursor.return_value = database_cursor + strain_list = ["B6D2F1", "BXD1", "BXD11", + "BXD12", "BXD13", "BXD16", "BXD15"] + + results = fetch_dataset_sample_id( + samplelist=strain_list, database=database_instance, species="mouse") + + self.assertEqual(results, expected_results) + + @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data") + @mock.patch("gn3.computations.datasets.divide_into_chunks") + def test_get_traits_data(self, mock_divide_into_chunks, mock_fetch_samples): + """Test for for function to get data\ + of traits in dataset + """ + _expected_results = {'AT_DSAFDS': [ + 12, 14, 13, 23, 12, 14, 13, 23, 12, 14, 13, 23]} + database = mock.Mock() + sample_id = [1, 2, 7, 3, 22, 8] + mock_divide_into_chunks.return_value = [ + [1, 2, 7], [3, 22, 8], [5, 22, 333]] + mock_fetch_samples.return_value = ("AT_DSAFDS", 12, 14, 13, 23) + results = get_traits_data(sample_id, database, "HC_M2", "Publish") + + self.assertEqual({}, dict(results)) + + def test_divide_into_chunks(self): + """Test for dividing a list into given number of\ + chunks for example + """ + results = divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) + + expected_results = [[1, 2, 7], [3, 22, 8], [5, 22, 333]] + + self.assertEqual(results, expected_results) |