diff options
Diffstat (limited to 'tests/qc')
-rw-r--r-- | tests/qc/test_cells.py | 56 | ||||
-rw-r--r-- | tests/qc/test_error_collection.py | 42 | ||||
-rw-r--r-- | tests/qc/test_header.py | 45 | ||||
-rw-r--r-- | tests/qc/test_parsing.py | 53 |
4 files changed, 19 insertions, 177 deletions
diff --git a/tests/qc/test_cells.py b/tests/qc/test_cells.py index 46aeb64..a38be30 100644 --- a/tests/qc/test_cells.py +++ b/tests/qc/test_cells.py @@ -1,64 +1,12 @@ """Test that values in cells within a line fulfill the required criteria""" -import pytest from random import randint from hypothesis import given from hypothesis import strategies as st from quality_control.errors import InvalidValue -from quality_control.errors import InvalidCellValue -from quality_control.average import ( - valid_value as avg_valid_value, - invalid_value as avg_invalid_value) -from quality_control.standard_error import ( - valid_value as se_valid_value, - invalid_value as se_invalid_value) - -@given(num_str=st.from_regex( - r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True)) -def test_cell_value_errors_with_invalid_inputs(num_str): - """Check that an error is raised for a cell with an invalid value.""" - with pytest.raises(InvalidCellValue): - avg_valid_value(num_str) - with pytest.raises(InvalidCellValue): - se_valid_value(num_str) - -@given(num_str=st.from_regex( - r"^[0-9]+\.([0-9]{1,2}|[0-9]{4,}$)", fullmatch=True)) -def test_cell_average_value_errors_if_not_three_decimal_places(num_str): - """Check that an error is raised if the average value does not have 3 decimal places""" - with pytest.raises(InvalidCellValue): - avg_valid_value(num_str) - -@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{3}$", fullmatch=True)) -def test_cell_average_value_pass_if_three_decimal_places(num_str): - """Check that there is no error if the average value has 3 decimal places.""" - processed = avg_valid_value(num_str) - assert ( - isinstance(processed, float) and - processed == float(num_str)) - -@given(num_str=st.from_regex(r"^[0-9]+\.([0-9]{0,5}$)", fullmatch=True)) -def test_cell_standard_error_value_errors_if_less_than_six_decimal_places(num_str): - """ - Check that an error is raised if the standard error value does not have 6 - decimal places - """ - with pytest.raises(InvalidCellValue): - se_valid_value(num_str) - -@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{6,}$", fullmatch=True)) -def test_cell_standard_error_value_pass_if_six_or_more_decimal_places(num_str): - """ - Check that there is no error if the standard error value has 3 decimal - places. - """ - processed = se_valid_value(num_str) - assert ( - isinstance(processed, float) and - processed == float(num_str)) - -## ================================================================================ +from quality_control.average import invalid_value as avg_invalid_value +from quality_control.standard_error import invalid_value as se_invalid_value @given(num_str=st.from_regex( r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True)) diff --git a/tests/qc/test_error_collection.py b/tests/qc/test_error_collection.py index 466f455..fe85bb1 100644 --- a/tests/qc/test_error_collection.py +++ b/tests/qc/test_error_collection.py @@ -1,33 +1,9 @@ +"""Check that error collection works as expected""" + import pytest -from quality_control.parsing import take, FileType, parse_errors -from quality_control.parsing import collect_errors - -@pytest.mark.slow -@pytest.mark.parametrize( - "filepath,filetype,seek_pos", - (("tests/test_data/average_crlf.tsv", FileType.AVERAGE, 0), - ("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE, - 205500004 # Skip first 500K lines - ), - ("tests/test_data/average.tsv", FileType.AVERAGE, 0), - ("tests/test_data/standarderror_1_error_at_end.tsv", - FileType.STANDARD_ERROR, 0), - ("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR, 0), - ("tests/test_data/duplicated_headers_no_data_errors.tsv", - FileType.AVERAGE, 0))) -def test_parse_errors(filepath, filetype, strains, seek_pos): - """ - Check that only errors are returned, and that certain properties hold for - said errors. - """ - for error in parse_errors(filepath, filetype, strains, seek_pos): - assert isinstance(error, dict) - assert "filepath" in error - assert "filetype" in error - assert "position" in error - assert "error" in error and isinstance(error["error"], str) - assert "message" in error +from quality_control.errors import InvalidValue, DuplicateHeading +from quality_control.parsing import take, FileType, collect_errors @pytest.mark.parametrize( "sample,num,expected", @@ -35,13 +11,11 @@ def test_parse_errors(filepath, filetype, strains, seek_pos): ([0, 1, 2, 3], 200, [0, 1, 2, 3]), (("he", "is", "a", "lovely", "boy"), 3, ["he", "is", "a"]))) def test_take(sample, num, expected): + """Check that `take` works correctly.""" taken = take(sample, num) assert len(taken) <= num assert taken == expected - -## ================================================== - @pytest.mark.slow @pytest.mark.parametrize( "filepath,filetype,count", @@ -55,4 +29,8 @@ def test_take(sample, num, expected): ("tests/test_data/duplicated_headers_no_data_errors.tsv", FileType.AVERAGE, 10))) def test_collect_errors(filepath, filetype, strains, count): - assert len(collect_errors(filepath, filetype, strains, count)) <= count + """Check that `collect_errors` works as expected.""" + results = take(collect_errors(filepath, filetype, strains), count) + def __valid_instance(item): + return isinstance(item, (InvalidValue, DuplicateHeading)) + assert all(__valid_instance(error) for error in results) diff --git a/tests/qc/test_header.py b/tests/qc/test_header.py index f860a71..2557e85 100644 --- a/tests/qc/test_header.py +++ b/tests/qc/test_header.py @@ -3,42 +3,13 @@ import pytest from hypothesis import given from hypothesis import strategies as st -from quality_control.headers import valid_header from quality_control.errors import InvalidValue, DuplicateHeading -from quality_control.errors import DuplicateHeader, InvalidHeaderValue from quality_control.headers import ( invalid_header, invalid_headings, duplicate_headings) -@given(headers=st.lists(st.text(max_size=10))) -def test_valid_header_errors_with_invalid_headers(headers): - "Verify that the check for header validity works" - with pytest.raises(InvalidHeaderValue): - valid_header(("BXD1", "BXD2", "BXD3"), headers) - -@pytest.mark.parametrize( - "strains,headers", [ - (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1")), - (("AStrain", "AnotherStrain", "YetAnotherStrain"), - ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))]) -def test_valid_header_strains_passes_with_valid_headers(strains, headers): - "Verify that the check for header validity works" - assert valid_header(strains, headers) - -@pytest.mark.parametrize( - "strains,headers", [ - (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1", "BXD1")), - (("AStrain", "AnotherStrain", "YetAnotherStrain"), - ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain", - "AStrain"))]) -def test_valid_header_fails_with_duplicate_headers(strains, headers): - """Check that parsing fails if any header is duplicated""" - with pytest.raises(DuplicateHeader): - valid_header(strains, headers) - -## ============================================================ - @given(headers=st.lists(st.text(max_size=10), max_size=1)) def test_invalid_header_with_list_of_one_value(headers): + """Test `invalid_header` with invalid header row""" assert invalid_header(0, headers) == InvalidValue( 0, 0, "<TAB>".join(headers), "The header MUST contain at least 2 columns") @@ -51,13 +22,12 @@ def test_invalid_headings_with_invalid_inputs(headings): for col, heading in enumerate(headings, start=2)) @pytest.mark.parametrize( - "strains,headers", [ - (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1")), - (("AStrain", "AnotherStrain", "YetAnotherStrain"), - ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))]) -def test_invalid_header_with_valid_headers(strains, headers): + "headers", [ + (("ProbeSet", "BXD3", "BXD1")), + (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))]) +def test_invalid_header_with_valid_headers(headers): "Verify that the check for header validity works" - assert invalid_header(0, headers) == None + assert invalid_header(0, headers) is None @pytest.mark.parametrize( "strains,headings", [ @@ -76,7 +46,7 @@ def test_invalid_headings_with_valid_headings(strains, headings): def test_duplicate_headers_with_repeated_column_headings(headers, repeated): """Check that parsing fails if any header is duplicated""" assert duplicate_headings(0, headers) == tuple( - DuplicateHeading(0, head, cols, ( + DuplicateHeading(0, cols, head, ( f"Heading '{head}', is repeated in columns " f"{','.join(str(i) for i in cols)}")) for head, cols in repeated.items()) @@ -88,4 +58,3 @@ def test_duplicate_headers_with_repeated_column_headings(headers, repeated): def test_duplicate_headers_with_unique_column_headings(headers): """Check that parsing fails if any header is duplicated""" assert duplicate_headings(0, headers) == tuple() - diff --git a/tests/qc/test_parsing.py b/tests/qc/test_parsing.py deleted file mode 100644 index 41739ad..0000000 --- a/tests/qc/test_parsing.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Test the parsing of the files""" -import pytest - -from quality_control.errors import ParseError -from quality_control.parsing import FileType, parse_file - -@pytest.mark.parametrize( - "filepath,filetype", - (("tests/test_data/average_crlf.tsv", FileType.STANDARD_ERROR), - ("tests/test_data/average_error_at_end_200MB.tsv", - FileType.STANDARD_ERROR), - ("tests/test_data/average.tsv", FileType.STANDARD_ERROR), - ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.AVERAGE), - ("tests/test_data/standarderror.tsv", FileType.AVERAGE), - ("tests/test_data/duplicated_headers_no_data_errors.tsv", - FileType.STANDARD_ERROR),)) -def test_parse_file_fails_with_wrong_filetype_declaration(filepath, filetype, strains): - """Check that parsing fails if the wrong file type is declared""" - with pytest.raises(ParseError): - for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable] - pass - -@pytest.mark.parametrize( - "filepath,filetype", - (("tests/test_data/no_data_errors.tsv", FileType.AVERAGE),)) -def test_parse_file_passes_with_valid_files(filepath, filetype, strains): - """Check that parsing succeeds with valid files""" - for line in parse_file(filepath, filetype, strains): - assert bool(line) - -@pytest.mark.slow -@pytest.mark.parametrize( - "filepath,filetype", - (("tests/test_data/average_large_no_errors.tsv", FileType.AVERAGE), - # ("tests/test_data/average_no_errors.tsv", FileType.AVERAGE), - # ("tests/test_data/standarderror_no_errors.tsv", FileType.STANDARD_ERROR), - )) -def test_parse_file_works_with_large_files(filepath, filetype, strains): - """Check that parsing succeeds even with large files.""" - for line in parse_file(filepath, filetype, strains): - assert bool(line) - -@pytest.mark.slow -@pytest.mark.parametrize( - "filepath,filetype", - (("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE), - ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.STANDARD_ERROR), - ("tests/test_data/duplicated_headers_no_data_errors.tsv", FileType.AVERAGE))) -def test_parse_file_raises_exception_on_error_in_file(filepath, filetype, strains): - "Check that parsing fails if any error is found in a file" - with pytest.raises(ParseError): - for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable] - pass |