From ce243a57b24d6adecb169487e706290d91b22d19 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 20 Feb 2024 10:57:56 +0300 Subject: Track filename in the errors R/qtl2 bundles can contain more than one file, of the same type. When errors are encountered in any of the files, we need to be able to inform the user which file it is, in addition to the line and column number. --- tests/qc/test_cells.py | 20 ++++++++++---------- tests/qc/test_cells_average.py | 2 +- tests/qc/test_cells_standard_error.py | 2 +- tests/qc/test_error_collection.py | 9 ++++++--- tests/qc/test_header.py | 25 +++++++++++++++---------- tests/r_qtl/test_r_qtl2_qc.py | 19 ++++++++++++------- 6 files changed, 45 insertions(+), 32 deletions(-) (limited to 'tests') diff --git a/tests/qc/test_cells.py b/tests/qc/test_cells.py index e4a0959..937579f 100644 --- a/tests/qc/test_cells.py +++ b/tests/qc/test_cells.py @@ -22,12 +22,12 @@ def test_cell_value_errors_with_invalid_inputs2(num_str): `quality_control.errors.InvalidValue` object which holds the error information. """ - assert avg_invalid_value(0, 0, num_str) == InvalidValue( - 0, 0, num_str, ( + assert avg_invalid_value("test.file", 0, 0, num_str) == InvalidValue( + "test.file", 0, 0, num_str, ( f"Invalid value '{num_str}'. Expected string representing a number " "with exactly three decimal places.")) - assert se_invalid_value(0, 0, num_str) == InvalidValue( - 0, 0, num_str, ( + assert se_invalid_value("test.file", 0, 0, num_str) == InvalidValue( + "test.file", 0, 0, num_str, ( f"Invalid value '{num_str}'. Expected string representing a number " "with at least six decimal places.")) @@ -43,8 +43,8 @@ def test_cell_average_value_errors_if_not_three_decimal_places2(num_str): object with the information about the placement of the invalid value. """ line, col = randint(0, 100), randint(0, 20) - assert avg_invalid_value(line, col, num_str) == InvalidValue( - line, col, num_str, ( + assert avg_invalid_value("test.file", line, col, num_str) == InvalidValue( + "test.file", line, col, num_str, ( f"Invalid value '{num_str}'. Expected string representing a number " "with exactly three decimal places.")) @@ -57,7 +57,7 @@ def test_cell_average_value_pass_if_three_decimal_places(num_str): THEN: `avg_invalid_value` returns `None` """ line, col = randint(0, 100), randint(0, 20) - assert avg_invalid_value(line, col, num_str) is None + assert avg_invalid_value("test.file", line, col, num_str) is None @given(num_str=st.from_regex(r"^[0-9]+\.([0-9]{0,5}$)", fullmatch=True).filter( lambda param: not re.match(r"^[0-9]+\.?0*$", param))) @@ -70,8 +70,8 @@ def test_cell_standard_error_value_errors_if_less_than_six_decimal_places2(num_s object with the information about the placement of the invalid value. """ line, col = randint(0, 100), randint(0, 20) - assert se_invalid_value(line, col, num_str) == InvalidValue( - line, col, num_str, ( + assert se_invalid_value("test.file", line, col, num_str) == InvalidValue( + "test.file", line, col, num_str, ( f"Invalid value '{num_str}'. Expected string representing a number " "with at least six decimal places.")) @@ -84,4 +84,4 @@ def test_cell_standard_error_value_pass_if_six_or_more_decimal_places(num_str): THEN: `se_invalid_value` returns `None` """ line, col = randint(0, 100), randint(0, 20) - assert se_invalid_value(line, col, num_str) is None + assert se_invalid_value("test.file", line, col, num_str) is None diff --git a/tests/qc/test_cells_average.py b/tests/qc/test_cells_average.py index 68fd4ec..b6ded31 100644 --- a/tests/qc/test_cells_average.py +++ b/tests/qc/test_cells_average.py @@ -14,4 +14,4 @@ def test_cell_average_value_pass_if_no_decimal_places(num_str): THEN: `avg_invalid_value` returns `None` """ line, col = randint(0, 100), randint(0, 20) - assert avg_invalid_value(line, col, num_str) is None + assert avg_invalid_value("test.file", line, col, num_str) is None diff --git a/tests/qc/test_cells_standard_error.py b/tests/qc/test_cells_standard_error.py index 90c13cf..fa9f1db 100644 --- a/tests/qc/test_cells_standard_error.py +++ b/tests/qc/test_cells_standard_error.py @@ -17,4 +17,4 @@ def test_cell_standard_error_value_errors_if_less_than_six_decimal_places2(num_s THEN: `se_invalid_value` returns a `None`. """ line, col = randint(0, 100), randint(0, 20) - assert invalid_value(line, col, num_str) is None + assert invalid_value("test.file", line, col, num_str) is None diff --git a/tests/qc/test_error_collection.py b/tests/qc/test_error_collection.py index 962d2c5..260fabf 100644 --- a/tests/qc/test_error_collection.py +++ b/tests/qc/test_error_collection.py @@ -44,11 +44,14 @@ def test_collect_errors(filepath, filetype, strains, count): "filepath,filetype,expected", (("tests/test_data/average_inconsistent_columns.tsv", FileType.AVERAGE, (InconsistentColumns( - 4, 4, 5, "Header row has 4 columns while row 4 has 5 columns"), + "average_inconsistent_columns.tsv", 4, 4, 5, + "Header row has 4 columns while row 4 has 5 columns"), InconsistentColumns( - 5, 4, 3, "Header row has 4 columns while row 5 has 3 columns"), + "average_inconsistent_columns.tsv", 5, 4, 3, + "Header row has 4 columns while row 5 has 3 columns"), InconsistentColumns( - 6, 4, 7, "Header row has 4 columns while row 6 has 7 columns"))),)) + "average_inconsistent_columns.tsv", 6, 4, 7, + "Header row has 4 columns while row 6 has 7 columns"))),)) def test_collect_inconsistent_column_errors(filepath, filetype, strains, expected): """ Given: A file with inconsistent columns in certain lines diff --git a/tests/qc/test_header.py b/tests/qc/test_header.py index 5e54122..06647a2 100644 --- a/tests/qc/test_header.py +++ b/tests/qc/test_header.py @@ -11,17 +11,22 @@ from quality_control.headers import ( @given(headers=st.lists(st.text(max_size=10), max_size=1)) def test_invalid_header_with_list_of_one_value(headers): """Test `invalid_header` with invalid header row""" - assert invalid_header(0, headers) == InvalidValue( - 0, 0, "".join(headers), + assert invalid_header("test.file", 0, headers) == InvalidValue( + "test.file", 0, 0, "".join(headers), "The header MUST contain at least 2 columns") @pytest.mark.unit_test @given(headings=st.lists(st.text(min_size=2, max_size=10), min_size=2)) def test_invalid_headings_with_invalid_inputs(headings): "Verify that the check for header validity works" - assert invalid_headings(0, ("BXD1", "BXD2", "BXD3"), headings) == tuple( - InvalidValue(0, col, heading, f"'{heading}' not a valid strain.") - for col, heading in enumerate(headings, start=2)) + assert invalid_headings( + "test.file", 0, ("BXD1", "BXD2", "BXD3"), headings) == tuple( + InvalidValue("test.file", + 0, + col, + heading, + f"'{heading}' not a valid strain.") + for col, heading in enumerate(headings, start=2)) @pytest.mark.unit_test @pytest.mark.parametrize( @@ -30,7 +35,7 @@ def test_invalid_headings_with_invalid_inputs(headings): (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))]) def test_invalid_header_with_valid_headers(headers): "Verify that the check for header validity works" - assert invalid_header(0, headers) is None + assert invalid_header("test.file", 0, headers) is None @pytest.mark.unit_test @pytest.mark.parametrize( @@ -40,7 +45,7 @@ def test_invalid_header_with_valid_headers(headers): ("AStrain", "AnotherStrain", "YetAnotherStrain"))]) def test_invalid_headings_with_valid_headings(strains, headings): "Verify that the check for header validity works" - assert invalid_headings(0, strains, headings) == tuple() + assert invalid_headings("test.file", 0, strains, headings) == tuple() @pytest.mark.unit_test @pytest.mark.parametrize( @@ -50,8 +55,8 @@ def test_invalid_headings_with_valid_headings(strains, headings): "AStrain"), {"AStrain": (2, 5)})]) def test_duplicate_headers_with_repeated_column_headings(headers, repeated): """Check that parsing fails if any header is duplicated""" - assert duplicate_headings(0, headers) == tuple( - DuplicateHeading(0, cols, head, ( + assert duplicate_headings("test.file", 0, headers) == tuple( + DuplicateHeading("test.file", 0, cols, head, ( f"Heading '{head}', is repeated in columns " f"{','.join(str(i) for i in cols)}")) for head, cols in repeated.items()) @@ -63,4 +68,4 @@ def test_duplicate_headers_with_repeated_column_headings(headers, repeated): (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain",))]) def test_duplicate_headers_with_unique_column_headings(headers): """Check that parsing fails if any header is duplicated""" - assert duplicate_headings(0, headers) == tuple() + assert duplicate_headings("test.file", 0, headers) == tuple() diff --git a/tests/r_qtl/test_r_qtl2_qc.py b/tests/r_qtl/test_r_qtl2_qc.py index d12172e..05db30e 100644 --- a/tests/r_qtl/test_r_qtl2_qc.py +++ b/tests/r_qtl/test_r_qtl2_qc.py @@ -124,9 +124,12 @@ def test_missing_files(filepath, expected): ("tests/r_qtl/test_files/test_geno.zip", tuple()), ("tests/r_qtl/test_files/geno_with_missing_genotypes.zip", - (InvalidValue(1, "AXR-1", "X", "Invalid value 'X'. Expected one of ('L', 'C')."), - InvalidValue(2, "EC.480C", "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."), - InvalidValue(6, "HH.335C-Col/PhyA", "H", f"Invalid value 'H'. Expected one of ('L', 'C')."))))) + (InvalidValue("geno", 1, "AXR-1", "X", + "Invalid value 'X'. Expected one of ('L', 'C')."), + InvalidValue("geno", 2, "EC.480C", + "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."), + InvalidValue("geno", 6, "HH.335C-Col/PhyA", "H", + f"Invalid value 'H'. Expected one of ('L', 'C')."))))) def test_geno_errors(filepath, expected): """ GIVEN: A R/qtl2 bundle @@ -145,10 +148,12 @@ def test_geno_errors(filepath, expected): ("tests/r_qtl/test_files/pheno_without_errors.zip", tuple()), ("tests/r_qtl/test_files/pheno_with_errors.zip", - (InvalidValue(1, "liver", "61.92", ("Invalid value '61.92'. Expected numerical value " - "with at least 3 decimal places.")), - InvalidValue(2, "spleen", "brrr", ("Invalid value 'brrr'. Expected numerical value " - "with at least 3 decimal places.")))))) + (InvalidValue("pheno", 1, "liver", "61.92", ( + "Invalid value '61.92'. Expected numerical value " + "with at least 3 decimal places.")), + InvalidValue("pheno", 2, "spleen", "brrr", ( + "Invalid value 'brrr'. Expected numerical value " + "with at least 3 decimal places.")))))) def test_pheno_errors(filepath, expected): """ GIVEN: A R/qtl2 bundle -- cgit v1.2.3