aboutsummaryrefslogtreecommitdiff
path: root/tests/qc
diff options
context:
space:
mode:
Diffstat (limited to 'tests/qc')
-rw-r--r--tests/qc/test_cells.py56
-rw-r--r--tests/qc/test_error_collection.py42
-rw-r--r--tests/qc/test_header.py45
-rw-r--r--tests/qc/test_parsing.py53
4 files changed, 19 insertions, 177 deletions
diff --git a/tests/qc/test_cells.py b/tests/qc/test_cells.py
index 46aeb64..a38be30 100644
--- a/tests/qc/test_cells.py
+++ b/tests/qc/test_cells.py
@@ -1,64 +1,12 @@
"""Test that values in cells within a line fulfill the required criteria"""
-import pytest
from random import randint
from hypothesis import given
from hypothesis import strategies as st
from quality_control.errors import InvalidValue
-from quality_control.errors import InvalidCellValue
-from quality_control.average import (
- valid_value as avg_valid_value,
- invalid_value as avg_invalid_value)
-from quality_control.standard_error import (
- valid_value as se_valid_value,
- invalid_value as se_invalid_value)
-
-@given(num_str=st.from_regex(
- r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True))
-def test_cell_value_errors_with_invalid_inputs(num_str):
- """Check that an error is raised for a cell with an invalid value."""
- with pytest.raises(InvalidCellValue):
- avg_valid_value(num_str)
- with pytest.raises(InvalidCellValue):
- se_valid_value(num_str)
-
-@given(num_str=st.from_regex(
- r"^[0-9]+\.([0-9]{1,2}|[0-9]{4,}$)", fullmatch=True))
-def test_cell_average_value_errors_if_not_three_decimal_places(num_str):
- """Check that an error is raised if the average value does not have 3 decimal places"""
- with pytest.raises(InvalidCellValue):
- avg_valid_value(num_str)
-
-@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{3}$", fullmatch=True))
-def test_cell_average_value_pass_if_three_decimal_places(num_str):
- """Check that there is no error if the average value has 3 decimal places."""
- processed = avg_valid_value(num_str)
- assert (
- isinstance(processed, float) and
- processed == float(num_str))
-
-@given(num_str=st.from_regex(r"^[0-9]+\.([0-9]{0,5}$)", fullmatch=True))
-def test_cell_standard_error_value_errors_if_less_than_six_decimal_places(num_str):
- """
- Check that an error is raised if the standard error value does not have 6
- decimal places
- """
- with pytest.raises(InvalidCellValue):
- se_valid_value(num_str)
-
-@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{6,}$", fullmatch=True))
-def test_cell_standard_error_value_pass_if_six_or_more_decimal_places(num_str):
- """
- Check that there is no error if the standard error value has 3 decimal
- places.
- """
- processed = se_valid_value(num_str)
- assert (
- isinstance(processed, float) and
- processed == float(num_str))
-
-## ================================================================================
+from quality_control.average import invalid_value as avg_invalid_value
+from quality_control.standard_error import invalid_value as se_invalid_value
@given(num_str=st.from_regex(
r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True))
diff --git a/tests/qc/test_error_collection.py b/tests/qc/test_error_collection.py
index 466f455..fe85bb1 100644
--- a/tests/qc/test_error_collection.py
+++ b/tests/qc/test_error_collection.py
@@ -1,33 +1,9 @@
+"""Check that error collection works as expected"""
+
import pytest
-from quality_control.parsing import take, FileType, parse_errors
-from quality_control.parsing import collect_errors
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
- "filepath,filetype,seek_pos",
- (("tests/test_data/average_crlf.tsv", FileType.AVERAGE, 0),
- ("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE,
- 205500004 # Skip first 500K lines
- ),
- ("tests/test_data/average.tsv", FileType.AVERAGE, 0),
- ("tests/test_data/standarderror_1_error_at_end.tsv",
- FileType.STANDARD_ERROR, 0),
- ("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR, 0),
- ("tests/test_data/duplicated_headers_no_data_errors.tsv",
- FileType.AVERAGE, 0)))
-def test_parse_errors(filepath, filetype, strains, seek_pos):
- """
- Check that only errors are returned, and that certain properties hold for
- said errors.
- """
- for error in parse_errors(filepath, filetype, strains, seek_pos):
- assert isinstance(error, dict)
- assert "filepath" in error
- assert "filetype" in error
- assert "position" in error
- assert "error" in error and isinstance(error["error"], str)
- assert "message" in error
+from quality_control.errors import InvalidValue, DuplicateHeading
+from quality_control.parsing import take, FileType, collect_errors
@pytest.mark.parametrize(
"sample,num,expected",
@@ -35,13 +11,11 @@ def test_parse_errors(filepath, filetype, strains, seek_pos):
([0, 1, 2, 3], 200, [0, 1, 2, 3]),
(("he", "is", "a", "lovely", "boy"), 3, ["he", "is", "a"])))
def test_take(sample, num, expected):
+ """Check that `take` works correctly."""
taken = take(sample, num)
assert len(taken) <= num
assert taken == expected
-
-## ==================================================
-
@pytest.mark.slow
@pytest.mark.parametrize(
"filepath,filetype,count",
@@ -55,4 +29,8 @@ def test_take(sample, num, expected):
("tests/test_data/duplicated_headers_no_data_errors.tsv",
FileType.AVERAGE, 10)))
def test_collect_errors(filepath, filetype, strains, count):
- assert len(collect_errors(filepath, filetype, strains, count)) <= count
+ """Check that `collect_errors` works as expected."""
+ results = take(collect_errors(filepath, filetype, strains), count)
+ def __valid_instance(item):
+ return isinstance(item, (InvalidValue, DuplicateHeading))
+ assert all(__valid_instance(error) for error in results)
diff --git a/tests/qc/test_header.py b/tests/qc/test_header.py
index f860a71..2557e85 100644
--- a/tests/qc/test_header.py
+++ b/tests/qc/test_header.py
@@ -3,42 +3,13 @@ import pytest
from hypothesis import given
from hypothesis import strategies as st
-from quality_control.headers import valid_header
from quality_control.errors import InvalidValue, DuplicateHeading
-from quality_control.errors import DuplicateHeader, InvalidHeaderValue
from quality_control.headers import (
invalid_header, invalid_headings, duplicate_headings)
-@given(headers=st.lists(st.text(max_size=10)))
-def test_valid_header_errors_with_invalid_headers(headers):
- "Verify that the check for header validity works"
- with pytest.raises(InvalidHeaderValue):
- valid_header(("BXD1", "BXD2", "BXD3"), headers)
-
-@pytest.mark.parametrize(
- "strains,headers", [
- (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1")),
- (("AStrain", "AnotherStrain", "YetAnotherStrain"),
- ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
-def test_valid_header_strains_passes_with_valid_headers(strains, headers):
- "Verify that the check for header validity works"
- assert valid_header(strains, headers)
-
-@pytest.mark.parametrize(
- "strains,headers", [
- (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1", "BXD1")),
- (("AStrain", "AnotherStrain", "YetAnotherStrain"),
- ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain",
- "AStrain"))])
-def test_valid_header_fails_with_duplicate_headers(strains, headers):
- """Check that parsing fails if any header is duplicated"""
- with pytest.raises(DuplicateHeader):
- valid_header(strains, headers)
-
-## ============================================================
-
@given(headers=st.lists(st.text(max_size=10), max_size=1))
def test_invalid_header_with_list_of_one_value(headers):
+ """Test `invalid_header` with invalid header row"""
assert invalid_header(0, headers) == InvalidValue(
0, 0, "<TAB>".join(headers),
"The header MUST contain at least 2 columns")
@@ -51,13 +22,12 @@ def test_invalid_headings_with_invalid_inputs(headings):
for col, heading in enumerate(headings, start=2))
@pytest.mark.parametrize(
- "strains,headers", [
- (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1")),
- (("AStrain", "AnotherStrain", "YetAnotherStrain"),
- ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
-def test_invalid_header_with_valid_headers(strains, headers):
+ "headers", [
+ (("ProbeSet", "BXD3", "BXD1")),
+ (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
+def test_invalid_header_with_valid_headers(headers):
"Verify that the check for header validity works"
- assert invalid_header(0, headers) == None
+ assert invalid_header(0, headers) is None
@pytest.mark.parametrize(
"strains,headings", [
@@ -76,7 +46,7 @@ def test_invalid_headings_with_valid_headings(strains, headings):
def test_duplicate_headers_with_repeated_column_headings(headers, repeated):
"""Check that parsing fails if any header is duplicated"""
assert duplicate_headings(0, headers) == tuple(
- DuplicateHeading(0, head, cols, (
+ DuplicateHeading(0, cols, head, (
f"Heading '{head}', is repeated in columns "
f"{','.join(str(i) for i in cols)}"))
for head, cols in repeated.items())
@@ -88,4 +58,3 @@ def test_duplicate_headers_with_repeated_column_headings(headers, repeated):
def test_duplicate_headers_with_unique_column_headings(headers):
"""Check that parsing fails if any header is duplicated"""
assert duplicate_headings(0, headers) == tuple()
-
diff --git a/tests/qc/test_parsing.py b/tests/qc/test_parsing.py
deleted file mode 100644
index 41739ad..0000000
--- a/tests/qc/test_parsing.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Test the parsing of the files"""
-import pytest
-
-from quality_control.errors import ParseError
-from quality_control.parsing import FileType, parse_file
-
-@pytest.mark.parametrize(
- "filepath,filetype",
- (("tests/test_data/average_crlf.tsv", FileType.STANDARD_ERROR),
- ("tests/test_data/average_error_at_end_200MB.tsv",
- FileType.STANDARD_ERROR),
- ("tests/test_data/average.tsv", FileType.STANDARD_ERROR),
- ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.AVERAGE),
- ("tests/test_data/standarderror.tsv", FileType.AVERAGE),
- ("tests/test_data/duplicated_headers_no_data_errors.tsv",
- FileType.STANDARD_ERROR),))
-def test_parse_file_fails_with_wrong_filetype_declaration(filepath, filetype, strains):
- """Check that parsing fails if the wrong file type is declared"""
- with pytest.raises(ParseError):
- for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable]
- pass
-
-@pytest.mark.parametrize(
- "filepath,filetype",
- (("tests/test_data/no_data_errors.tsv", FileType.AVERAGE),))
-def test_parse_file_passes_with_valid_files(filepath, filetype, strains):
- """Check that parsing succeeds with valid files"""
- for line in parse_file(filepath, filetype, strains):
- assert bool(line)
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
- "filepath,filetype",
- (("tests/test_data/average_large_no_errors.tsv", FileType.AVERAGE),
- # ("tests/test_data/average_no_errors.tsv", FileType.AVERAGE),
- # ("tests/test_data/standarderror_no_errors.tsv", FileType.STANDARD_ERROR),
- ))
-def test_parse_file_works_with_large_files(filepath, filetype, strains):
- """Check that parsing succeeds even with large files."""
- for line in parse_file(filepath, filetype, strains):
- assert bool(line)
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
- "filepath,filetype",
- (("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE),
- ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.STANDARD_ERROR),
- ("tests/test_data/duplicated_headers_no_data_errors.tsv", FileType.AVERAGE)))
-def test_parse_file_raises_exception_on_error_in_file(filepath, filetype, strains):
- "Check that parsing fails if any error is found in a file"
- with pytest.raises(ParseError):
- for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable]
- pass