diff options
author | zsloan | 2021-10-12 18:56:34 +0000 |
---|---|---|
committer | zsloan | 2021-10-12 18:56:34 +0000 |
commit | 0f396f4a1a753d449cf2975fc425d587d9350689 (patch) | |
tree | c9dac243dc05e5cb90ccb7f1d96fd599986bf60a /tests/unit | |
parent | 976660ce9ef915426c7ce5ff9077b439e4102a2c (diff) | |
parent | 77c274b79c3ec01de60e90db3299763cb58f715b (diff) | |
download | genenetwork3-0f396f4a1a753d449cf2975fc425d587d9350689.tar.gz |
Merge branch 'main' of https://github.com/genenetwork/genenetwork3 into feature/add_rqtl_pairscan
Diffstat (limited to 'tests/unit')
-rw-r--r-- | tests/unit/computations/data/qtlreaper/main_output_sample.txt | 11 | ||||
-rw-r--r-- | tests/unit/computations/data/qtlreaper/permu_output_sample.txt | 27 | ||||
-rw-r--r-- | tests/unit/computations/test_parsers.py | 4 | ||||
-rw-r--r-- | tests/unit/computations/test_qtlreaper.py | 135 | ||||
-rw-r--r-- | tests/unit/computations/test_wgcna.py | 160 | ||||
-rw-r--r-- | tests/unit/db/data/genotypes/genotype_sample1.geno | 23 | ||||
-rw-r--r-- | tests/unit/db/test_datasets.py | 42 | ||||
-rw-r--r-- | tests/unit/db/test_genotypes.py | 170 | ||||
-rw-r--r-- | tests/unit/db/test_traits.py | 16 | ||||
-rw-r--r-- | tests/unit/sample_test_data.py | 111 | ||||
-rw-r--r-- | tests/unit/test_heatmaps.py (renamed from tests/unit/computations/test_heatmap.py) | 152 |
11 files changed, 790 insertions, 61 deletions
diff --git a/tests/unit/computations/data/qtlreaper/main_output_sample.txt b/tests/unit/computations/data/qtlreaper/main_output_sample.txt new file mode 100644 index 0000000..12b11b4 --- /dev/null +++ b/tests/unit/computations/data/qtlreaper/main_output_sample.txt @@ -0,0 +1,11 @@ +ID Locus Chr cM Mb LRS Additive pValue +T1 rs31443144 1 1.500 3.010 0.500 -0.074 1.000 +T1 rs6269442 1 1.500 3.492 0.500 -0.074 1.000 +T1 rs32285189 1 1.630 3.511 0.500 -0.074 1.000 +T1 rs258367496 1 1.630 3.660 0.500 -0.074 1.000 +T1 rs32430919 1 1.750 3.777 0.500 -0.074 1.000 +T1 rs36251697 1 1.880 3.812 0.500 -0.074 1.000 +T1 rs30658298 1 2.010 4.431 0.500 -0.074 1.000 +T1 rs51852623 1 2.010 4.447 0.500 -0.074 1.000 +T1 rs31879829 1 2.140 4.519 0.500 -0.074 1.000 +T1 rs36742481 1 2.140 4.776 0.500 -0.074 1.000 diff --git a/tests/unit/computations/data/qtlreaper/permu_output_sample.txt b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt new file mode 100644 index 0000000..64cff07 --- /dev/null +++ b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt @@ -0,0 +1,27 @@ +4.44174 +5.03825 +5.08167 +5.18119 +5.18578 +5.24563 +5.24619 +5.24619 +5.27961 +5.28228 +5.43903 +5.50188 +5.51694 +5.56830 +5.63874 +5.71346 +5.71936 +5.74275 +5.76764 +5.79815 +5.81671 +5.82775 +5.89659 +5.92117 +5.93396 +5.93396 +5.94957 diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py index 19c3067..b51b0bf 100644 --- a/tests/unit/computations/test_parsers.py +++ b/tests/unit/computations/test_parsers.py @@ -15,7 +15,7 @@ class TestParsers(unittest.TestCase): def test_parse_genofile_with_existing_file(self): """Test that a genotype file is parsed correctly""" - strains = ["bxd1", "bxd2"] + samples = ["bxd1", "bxd2"] genotypes = [ {"chr": "1", "locus": "rs31443144", "cm": "1.50", "mb": "3.010274", @@ -51,4 +51,4 @@ class TestParsers(unittest.TestCase): "../test_data/genotype.txt" )) self.assertEqual(parse_genofile( - test_genotype_file), (strains, genotypes)) + test_genotype_file), (samples, genotypes)) diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py new file mode 100644 index 0000000..742d106 --- /dev/null +++ b/tests/unit/computations/test_qtlreaper.py @@ -0,0 +1,135 @@ +"""Module contains tests for gn3.computations.qtlreaper""" +from unittest import TestCase +from gn3.computations.qtlreaper import ( + parse_reaper_main_results, + organise_reaper_main_results, + parse_reaper_permutation_results) +from tests.unit.sample_test_data import organised_trait_1 + +class TestQTLReaper(TestCase): + """Class for testing qtlreaper interface functions.""" + + def test_parse_reaper_main_results(self): + """Test that the main results file is parsed correctly.""" + self.assertEqual( + parse_reaper_main_results( + "tests/unit/computations/data/qtlreaper/main_output_sample.txt"), + [ + { + "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500, + "Mb": 3.010, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500, + "Mb": 3.492, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630, + "Mb": 3.511, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630, + "Mb": 3.660, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750, + "Mb": 3.777, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880, + "Mb": 3.812, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010, + "Mb": 4.431, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010, + "Mb": 4.447, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140, + "Mb": 4.519, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140, + "Mb": 4.776, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + } + ]) + + def test_parse_reaper_permutation_results(self): + """Test that the permutations results file is parsed correctly.""" + self.assertEqual( + parse_reaper_permutation_results( + "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"), + [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619, + 5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830, + 5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671, + 5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957]) + + def test_organise_reaper_main_results(self): + """Check that results are organised correctly.""" + self.assertEqual( + organise_reaper_main_results([ + { + "ID": "1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500, + "Mb": 3.010, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500, + "Mb": 3.492, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630, + "Mb": 3.511, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630, + "Mb": 3.660, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750, + "Mb": 3.777, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880, + "Mb": 3.812, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010, + "Mb": 4.431, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010, + "Mb": 4.447, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140, + "Mb": 4.519, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + }, + { + "ID": "1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140, + "Mb": 4.776, "LRS": 0.500, "Additive": -0.074, + "pValue": 1.000 + } + ]), + organised_trait_1) diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py new file mode 100644 index 0000000..ec81d94 --- /dev/null +++ b/tests/unit/computations/test_wgcna.py @@ -0,0 +1,160 @@ +"""module contains python code for wgcna""" +from unittest import TestCase +from unittest import mock + +from gn3.computations.wgcna import dump_wgcna_data +from gn3.computations.wgcna import compose_wgcna_cmd +from gn3.computations.wgcna import call_wgcna_script + + +class TestWgcna(TestCase): + """test class for wgcna""" + + @mock.patch("gn3.computations.wgcna.run_cmd") + @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd") + @mock.patch("gn3.computations.wgcna.dump_wgcna_data") + def test_call_wgcna_script(self, + mock_dumping_data, + mock_compose_wgcna, + mock_run_cmd): + """test for calling wgcna script""" + + # pylint: disable = line-too-long + mock_dumping_data.return_value = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + mock_compose_wgcna.return_value = "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + request_data = { + "trait_names": ["1455537_at", "1425637_at", "1449593_at", "1421945_a_at", "1450423_s_at", "1423841_at", "1451144_at"], + "trait_sample_data": [ + { + "129S1/SvImJ": 7.142, + "A/J": 7.31, + "AKR/J": 7.49, + "B6D2F1": 6.899, + "BALB/cByJ": 7.172, + "BALB/cJ": 7.396 + }, + { + "129S1/SvImJ": 7.071, + "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, + "BALB/cByJ": 7.293, + "BALB/cJ": 7.117 + }]} + + mock_run_cmd_results = { + + "code": 0, + "output": "Flagging genes and samples with too many missing values...\n ..step 1\nAllowing parallel execution with up to 3 working processes.\npickSoftThreshold: will use block size 7.\n pickSoftThreshold: calculating connectivity for given powers...\n ..working on genes 1 through 7 of 7\n Flagging genes and samples with too many missing values...\n ..step 1\n ..Working on block 1 .\n TOM calculation: adjacency..\n ..will not use multithreading.\nclustering..\n ....detecting modules..\n ....calculating module eigengenes..\n ....checking kME in modules..\n ..merging modules that are too close..\n mergeCloseModules: Merging modules whose distance is less than 0.15\n mergeCloseModules: less than two proper modules.\n ..color levels are turquoise\n ..there is nothing to merge.\n Calculating new MEs...\n" + } + + json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"outputdata\":{\"eigengenes\":[],\"colors\":[]}}" + + expected_output = { + + "data": { + "inputdata": { + "trait_sample_data ": {}, + "minModuleSize": 30, + "TOMtype": "unsigned" + }, + + "outputdata": { + "eigengenes": [], + "colors": [] + } + }, + + **mock_run_cmd_results + + } + + with mock.patch("builtins.open", mock.mock_open(read_data=json_output)): + + mock_run_cmd.return_value = mock_run_cmd_results + + results = call_wgcna_script( + "Rscript/GUIX_PATH/scripts/r_file.R", request_data) + + mock_dumping_data.assert_called_once_with(request_data) + + mock_compose_wgcna.assert_called_once_with( + "Rscript/GUIX_PATH/scripts/r_file.R", + "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json") + + mock_run_cmd.assert_called_once_with( + "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json") + + self.assertEqual(results, expected_output) + + @mock.patch("gn3.computations.wgcna.run_cmd") + @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd") + @mock.patch("gn3.computations.wgcna.dump_wgcna_data") + def test_call_wgcna_script_fails(self, mock_dumping_data, mock_compose_wgcna, mock_run_cmd): + """test for calling wgcna script\ + fails and generates the expected error""" + # pylint: disable = line-too-long, + mock_dumping_data.return_value = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + mock_compose_wgcna.return_value = "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + expected_error = { + "code": 2, + "output": "could not read the json file" + } + + with mock.patch("builtins.open", mock.mock_open(read_data="")): + + mock_run_cmd.return_value = expected_error + self.assertEqual(call_wgcna_script( + "input_file.R", ""), expected_error) + + def test_compose_wgcna_cmd(self): + """test for composing wgcna cmd""" + wgcna_cmd = compose_wgcna_cmd( + "wgcna.r", "/tmp/wgcna.json") + self.assertEqual( + wgcna_cmd, "Rscript ./scripts/wgcna.r /tmp/wgcna.json") + + @mock.patch("gn3.computations.wgcna.TMPDIR", "/tmp") + @mock.patch("gn3.computations.wgcna.uuid.uuid4") + def test_create_json_file(self, file_name_generator): + """test for writing the data to a csv file""" + # # All the traits we have data for (should not contain duplicates) + # All the strains we have data for (contains duplicates) + + trait_sample_data = {"1425642_at": {"129S1/SvImJ": 7.142, + "A/J": 7.31, "AKR/J": 7.49, + "B6D2F1": 6.899, "BALB/cByJ": 7.172, + "BALB/cJ": 7.396}, + "1457784_at": {"129S1/SvImJ": 7.071, "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, "BALB/cByJ": 7.293, + "BALB/cJ": 7.117}, + "1444351_at": {"129S1/SvImJ": 7.221, "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, "BALB/cByJ": 6.752, + "BALB/cJ": 7.269} + + } + + expected_input = { + "trait_sample_data": trait_sample_data, + "TOMtype": "unsigned", + "minModuleSize": 30 + } + + with mock.patch("builtins.open", mock.mock_open()) as file_handler: + + file_name_generator.return_value = "facb73ff-7eef-4053-b6ea-e91d3a22a00c" + + results = dump_wgcna_data( + expected_input) + + file_handler.assert_called_once_with( + "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w') + + self.assertEqual( + results, "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json") diff --git a/tests/unit/db/data/genotypes/genotype_sample1.geno b/tests/unit/db/data/genotypes/genotype_sample1.geno new file mode 100644 index 0000000..2a55964 --- /dev/null +++ b/tests/unit/db/data/genotypes/genotype_sample1.geno @@ -0,0 +1,23 @@ +# File name: genotype_sample for testing + +# Metadata: Please retain this header information with file. + + +@name: BXD +@type: riset +@mat: B +@pat: D +@het:H +@unk: U + + + + + + +Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8 BXD9 +1 rs31443144 1.50 3.010274 B B D D D B +1 rs6269442 1.50 3.492195 B B D D H Y +1 rs32285189 1.63 3.511204 B U D D D B +2 rs31443144 1.50 3.010274 B B D D D B +2 rs6269442 1.50 3.492195 B B D D H Y
\ No newline at end of file diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py index 38de0e2..39f4af9 100644 --- a/tests/unit/db/test_datasets.py +++ b/tests/unit/db/test_datasets.py @@ -3,10 +3,10 @@ from unittest import mock, TestCase from gn3.db.datasets import ( retrieve_dataset_name, - retrieve_riset_fields, - retrieve_geno_riset_fields, - retrieve_publish_riset_fields, - retrieve_probeset_riset_fields) + retrieve_group_fields, + retrieve_geno_group_fields, + retrieve_publish_group_fields, + retrieve_probeset_group_fields) class TestDatasetsDBFunctions(TestCase): """Test cases for datasets functions.""" @@ -40,9 +40,9 @@ class TestDatasetsDBFunctions(TestCase): table=table, cols=columns), {"threshold": thresh, "name": dataset_name}) - def test_retrieve_probeset_riset_fields(self): + def test_retrieve_probeset_group_fields(self): """ - Test that the `riset` and `riset_id` fields are retrieved appropriately + Test that the `group` and `group_id` fields are retrieved appropriately for the 'ProbeSet' trait type. """ for trait_name, expected in [ @@ -52,7 +52,7 @@ class TestDatasetsDBFunctions(TestCase): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - retrieve_probeset_riset_fields(trait_name, db_mock), + retrieve_probeset_group_fields(trait_name, db_mock), expected) cursor.execute.assert_called_once_with( ( @@ -63,34 +63,34 @@ class TestDatasetsDBFunctions(TestCase): " AND ProbeSetFreeze.Name = %(name)s"), {"name": trait_name}) - def test_retrieve_riset_fields(self): + def test_retrieve_group_fields(self): """ - Test that the riset fields are set up correctly for the different trait + Test that the group fields are set up correctly for the different trait types. """ for trait_type, trait_name, dataset_info, expected in [ ["Publish", "pubTraitName01", {"dataset_name": "pubDBName01"}, - {"dataset_name": "pubDBName01", "riset": ""}], + {"dataset_name": "pubDBName01", "group": ""}], ["ProbeSet", "prbTraitName01", {"dataset_name": "prbDBName01"}, - {"dataset_name": "prbDBName01", "riset": ""}], + {"dataset_name": "prbDBName01", "group": ""}], ["Geno", "genoTraitName01", {"dataset_name": "genoDBName01"}, - {"dataset_name": "genoDBName01", "riset": ""}], - ["Temp", "tempTraitName01", {}, {"riset": ""}], + {"dataset_name": "genoDBName01", "group": ""}], + ["Temp", "tempTraitName01", {}, {"group": ""}], ]: db_mock = mock.MagicMock() with self.subTest( trait_type=trait_type, trait_name=trait_name, dataset_info=dataset_info): with db_mock.cursor() as cursor: - cursor.execute.return_value = ("riset_name", 0) + cursor.execute.return_value = ("group_name", 0) self.assertEqual( - retrieve_riset_fields( + retrieve_group_fields( trait_type, trait_name, dataset_info, db_mock), expected) - def test_retrieve_publish_riset_fields(self): + def test_retrieve_publish_group_fields(self): """ - Test that the `riset` and `riset_id` fields are retrieved appropriately + Test that the `group` and `group_id` fields are retrieved appropriately for the 'Publish' trait type. """ for trait_name, expected in [ @@ -100,7 +100,7 @@ class TestDatasetsDBFunctions(TestCase): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - retrieve_publish_riset_fields(trait_name, db_mock), + retrieve_publish_group_fields(trait_name, db_mock), expected) cursor.execute.assert_called_once_with( ( @@ -110,9 +110,9 @@ class TestDatasetsDBFunctions(TestCase): " AND PublishFreeze.Name = %(name)s"), {"name": trait_name}) - def test_retrieve_geno_riset_fields(self): + def test_retrieve_geno_group_fields(self): """ - Test that the `riset` and `riset_id` fields are retrieved appropriately + Test that the `group` and `group_id` fields are retrieved appropriately for the 'Geno' trait type. """ for trait_name, expected in [ @@ -122,7 +122,7 @@ class TestDatasetsDBFunctions(TestCase): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - retrieve_geno_riset_fields(trait_name, db_mock), + retrieve_geno_group_fields(trait_name, db_mock), expected) cursor.execute.assert_called_once_with( ( diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py new file mode 100644 index 0000000..c125224 --- /dev/null +++ b/tests/unit/db/test_genotypes.py @@ -0,0 +1,170 @@ +"""Tests gn3.db.genotypes""" +from unittest import TestCase +from gn3.db.genotypes import ( + parse_genotype_file, + parse_genotype_labels, + parse_genotype_header, + parse_genotype_marker, + build_genotype_chromosomes) + +class TestGenotypes(TestCase): + """Tests for functions in `gn3.db.genotypes`.""" + + def test_parse_genotype_labels(self): + """Test that the genotype labels are parsed correctly.""" + self.assertEqual( + parse_genotype_labels([ + "@name: test_group\t", "@filler: test_filler ", + "@type:test_type", "@mat:test_mat \t", "@pat:test_pat ", + "@het: test_het ", "@unk: test_unk", "@other: test_other", + "@brrr: test_brrr "]), + (("group", "test_group"), ("filler", "test_filler"), + ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"), + ("het", "test_het"), ("unk", "test_unk"))) + + def test_parse_genotype_header(self): + """Test that the genotype header is parsed correctly.""" + for header, expected in [ + [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t" + "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"), + (("Mbmap", True), ("cm_column", 2), ("mb_column", 3), + ("prgy", + ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", + "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", + "BXD19")), + ("nprgy", 14))], + [("Chr\tLocus\tcM\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\tBXD11" + "\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18"), + (("Mbmap", False), ("cm_column", 2), ("mb_column", None), + ("prgy", + ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", + "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18")), + ("nprgy", 13))]]: + with self.subTest(header=header): + self.assertEqual(parse_genotype_header(header), expected) + + def test_parse_genotype_data_line(self): + """Test parsing of data lines.""" + for line, geno_obj, parlist, expected in [ + ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB", + {"mat": "test_mat", "pat": "test_pat", "het": "test_het", + "unk": "test_unk", "cm_column": 2, "Mbmap": True, + "mb_column": 3}, + tuple(), + (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", + ("U", "U", "U", "U", "U", "U", "U", "U", "U", "U")))], + ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB", + {"mat": "test_mat", "pat": "test_pat", "het": "test_het", + "unk": "test_unk", "cm_column": 2, "Mbmap": True, + "mb_column": 3}, + ("some", "parlist", "content"), + (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", + (-1, 1, "U", "U", "U", "U", "U", "U", "U", "U")))], + ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tH\tD\tB\tU\tD\tB\tB", + {"mat": "B", "pat": "D", "het": "H", "unk": "U", + "cm_column": 2, "Mbmap": True, "mb_column": 3}, + tuple(), + (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]: + with self.subTest(line=line): + self.assertEqual( + parse_genotype_marker(line, geno_obj, parlist), + expected) + + def test_build_genotype_chromosomes(self): + """ + Given `markers` and `geno_obj`, test that `build_genotype_chromosomes` + builds a sequence of chromosomes with the given markers ordered + according to the `chr` value.""" + for markers, geno_obj, expected in [ + [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1))), + (("chr", "2"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))], + {"mat": "B", "pat": "D", "het": "H", "unk": "U", + "cm_column": 2, "Mbmap": True, "mb_column": 3}, + ((("name", "1"), ("mb_exists", True), ("cm_column", 2), + ("mb_column", 3), + ("loci", + ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": 3.0, + "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))), + (("name", "2"), ("mb_exists", True), ("cm_column", 2), + ("mb_column", 3), + ("loci", + ({"chr": "2", "name": "rs31443144", "cM": 2.0, "Mb": 3.0, + "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))))], + [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", None), + ("genotype", (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)))], + {"mat": "B", "pat": "D", "het": "H", "unk": "U", + "cm_column": 2, "Mbmap": False, "mb_column": None}, + ((("name", "1"), ("mb_exists", False), ("cm_column", 2), + ("mb_column", None), + ("loci", + ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None, + "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]: + with self.subTest(markers=markers): + self.assertEqual( + build_genotype_chromosomes(geno_obj, markers), + expected) + + def test_parse_genotype_file(self): + """Test the parsing of genotype files. """ + self.assertEqual( + parse_genotype_file( + "tests/unit/db/data/genotypes/genotype_sample1.geno"), + {"group": "BXD", + "type": "riset", + "mat": "B", + "pat": "D", + "het": "H", + "unk": "U", + "Mbmap": True, + "cm_column": 2, + "mb_column": 3, + "prgy": ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"), + "nprgy": 6, + "chromosomes": ( + {"name": "1", + "mb_exists": True, + "cm_column": 2, + "mb_column": 3, + "loci": ( + {"chr": "1", + "name": "rs31443144", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 1, -1) + }, + {"chr": "1", + "name": "rs6269442", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 0, "U")}, + {"chr": "1", + "name": "rs32285189", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, "U", 1, 1, 1, -1)})}, + {"name": "2", + "mb_exists": True, + "cm_column": 2, + "mb_column": 3, + "loci": ( + {"chr": "2", + "name": "rs31443144", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 1, -1)}, + {"chr": "2", + "name": "rs6269442", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 0, "U")})})}) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index ee98893..8af8e82 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -166,15 +166,19 @@ class TestTraitsDBFunctions(TestCase): the right calls. """ + # pylint: disable=C0103 db_mock = mock.MagicMock() STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" - PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " - "WHERE StrainId = %s AND Id = %s") - PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " - "WHERE StrainId = %s AND DataId = %s") - N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s " - "WHERE StrainId = %s AND DataId = %s") + PUBLISH_DATA_SQL: str = ( + "UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND Id = %s") + PUBLISH_SE_SQL: str = ( + "UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ( + "UPDATE NStrain SET count = %s " + "WHERE StrainId = %s AND DataId = %s") with db_mock.cursor() as cursor: type(cursor).rowcount = 1 diff --git a/tests/unit/sample_test_data.py b/tests/unit/sample_test_data.py new file mode 100644 index 0000000..407d074 --- /dev/null +++ b/tests/unit/sample_test_data.py @@ -0,0 +1,111 @@ +""" +This module holds a collection of sample data variables, used in more than one + test. + +This is mostly to avoid the `duplicate-code` pylint error that gets raised if +the same data is defined in more than one file. It has been found that adding +the `# pylint: disable=R0801` or `# pylint: disable=duplicate-code` to the top +of the file seems to not work as expected. + +Adding these same declarations to .pylintrc is not an option, since that, +seemingly, would deactivate the warnings for all code in the project: We do not +want that. +""" + +organised_trait_1 = { + "1": { + "ID": "1", + "chromosomes": { + 1: {"Chr": 1, + "loci": [ + { + "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }]}, + 2: {"Chr": 2, + "loci": [ + { + "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }]}}}} + +organised_trait_2 = { + "2": { + "ID": "2", + "chromosomes": { + 1: {"Chr": 1, + "loci": [ + { + "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }]}, + 2: {"Chr": 2, + "loci": [ + { + "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, + "LRS": 0.579, "Additive": -0.074, "pValue": 1.000 + }]}}}} diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/test_heatmaps.py index 650cb45..7b66688 100644 --- a/tests/unit/computations/test_heatmap.py +++ b/tests/unit/test_heatmaps.py @@ -1,38 +1,56 @@ -"""Module contains tests for gn3.computations.heatmap""" +"""Module contains tests for gn3.heatmaps.heatmaps""" from unittest import TestCase -from gn3.computations.heatmap import cluster_traits, export_trait_data +from gn3.heatmaps import ( + cluster_traits, + get_loci_names, + get_lrs_from_chr, + export_trait_data, + compute_traits_order, + retrieve_samples_and_values, + process_traits_data_for_heatmap) +from tests.unit.sample_test_data import organised_trait_1, organised_trait_2 -strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] +samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] trait_data = { "mysqlid": 36688172, "data": { - "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, - "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, - "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, - "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, - "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, - "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, - "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, - "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, - "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, - "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, - "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, - "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, - "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, - "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, - "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, - "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, - "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, - "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, - "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, - "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, - "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, - "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, - "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, - "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, - "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, - "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, - "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} + "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, + "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, + "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, + "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, + "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, + "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, + "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, + "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, + "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, + "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, + "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, + "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, + "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, + "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, + "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, + "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, + "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, + "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, + "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, + "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, + "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, + "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} + +slinked = ( + (((0, 2, 0.16381088984330505), + ((1, 7, 0.06024619831474998), 5, 0.19179284676938602), + 0.20337048635536847), + 9, + 0.23451785425383564), + ((3, (6, 8, 0.2140799896286565), 0.25879514152086425), + 4, 0.8968250491499363), + 0.9313185954797953) class TestHeatmap(TestCase): """Class for testing heatmap computation functions""" @@ -49,7 +67,7 @@ class TestHeatmap(TestCase): ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]: with self.subTest(dtype=dtype): self.assertEqual( - export_trait_data(trait_data, strainlist, dtype=dtype), + export_trait_data(trait_data, samplelist, dtype=dtype), expected) def test_export_trait_data_dtype_all_flags(self): @@ -89,7 +107,7 @@ class TestHeatmap(TestCase): with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): self.assertEqual( export_trait_data( - trait_data, strainlist, dtype=dtype, var_exists=vflag, + trait_data, samplelist, dtype=dtype, var_exists=vflag, n_exists=nflag), expected) @@ -141,3 +159,73 @@ class TestHeatmap(TestCase): 0.9313185954797953, 1.1683723389247052, 0.23451785425383564, 1.7413442197913358, 0.33370067057028485, 1.3256191648260216, 0.0))) + + def test_compute_heatmap_order(self): + """Test the orders.""" + self.assertEqual( + compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4)) + + def test_retrieve_samples_and_values(self): + """Test retrieval of samples and values.""" + for orders, slist, tdata, expected in [ + [ + [2], + ["s1", "s2", "s3", "s4"], + [[2, 9, 6, None, 4], + [7, 5, None, None, 4], + [9, None, 5, 4, 7], + [6, None, None, 4, None]], + [[2, ["s1", "s3", "s4"], [9, 5, 4]]] + ], + [ + [3], + ["s1", "s2", "s3", "s4", "s5"], + [[2, 9, 6, None, 4], + [7, 5, None, None, 4], + [9, None, 5, 4, 7], + [6, None, None, 4, None]], + [[3, ["s1", "s4"], [6, 4]]] + ]]: + with self.subTest(samplelist=slist, traitdata=tdata): + self.assertEqual( + retrieve_samples_and_values(orders, slist, tdata), expected) + + def test_get_lrs_from_chr(self): + """Check that function gets correct LRS values""" + for trait, chromosome, expected in [ + [{"chromosomes": {}}, 3, [None]], + [{"chromosomes": {3: {"loci": [ + {"Locus": "b", "LRS": 1.9}, + {"Locus": "a", "LRS": 13.2}, + {"Locus": "d", "LRS": 53.21}, + {"Locus": "c", "LRS": 2.22}]}}}, + 3, + [13.2, 1.9, 2.22, 53.21]]]: + with self.subTest(trait=trait, chromosome=chromosome): + self.assertEqual(get_lrs_from_chr(trait, chromosome), expected) + + def test_process_traits_data_for_heatmap(self): + """Check for correct processing of data for heatmap generation.""" + self.assertEqual( + process_traits_data_for_heatmap( + {**organised_trait_1, **organised_trait_2}, + ["2", "1"], + [1, 2]), + [[[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]], + [[0.5, 0.579, 0.5], + [0.5, 0.5, 0.5]]]) + + def test_get_loci_names(self): + """Check that loci names are retrieved correctly.""" + for organised, expected in ( + (organised_trait_1, + (("rs258367496", "rs30658298", "rs31443144", "rs32285189", + "rs32430919", "rs36251697", "rs6269442"), + ("rs31879829", "rs36742481", "rs51852623"))), + ({**organised_trait_1, **organised_trait_2}, + (("rs258367496", "rs30658298", "rs31443144", "rs32285189", + "rs32430919", "rs36251697", "rs6269442"), + ("rs31879829", "rs36742481", "rs51852623")))): + with self.subTest(organised=organised): + self.assertEqual(get_loci_names(organised, (1, 2)), expected) |