4 files changed, 19 insertions, 177 deletions
diff --git a/tests/qc/test_cells.py b/tests/qc/test_cells.py
index 46aeb64..a38be30 100644
--- a/tests/qc/test_cells.py
+++ b/tests/qc/test_cells.py
@@ -1,64 +1,12 @@
 """Test that values in cells within a line fulfill the required criteria"""
 
-import pytest
 from random import randint
 from hypothesis import given
 from hypothesis import strategies as st
 
 from quality_control.errors import InvalidValue
-from quality_control.errors import InvalidCellValue
-from quality_control.average import (
-    valid_value as avg_valid_value,
-    invalid_value as avg_invalid_value)
-from quality_control.standard_error import (
-    valid_value as se_valid_value,
-    invalid_value as se_invalid_value)
-
-@given(num_str=st.from_regex(
-    r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True))
-def test_cell_value_errors_with_invalid_inputs(num_str):
-    """Check that an error is raised for a cell with an invalid value."""
-    with pytest.raises(InvalidCellValue):
-        avg_valid_value(num_str)
-    with pytest.raises(InvalidCellValue):
-        se_valid_value(num_str)
-
-@given(num_str=st.from_regex(
-    r"^[0-9]+\.([0-9]{1,2}|[0-9]{4,}$)", fullmatch=True))
-def test_cell_average_value_errors_if_not_three_decimal_places(num_str):
-    """Check that an error is raised if the average value does not have 3 decimal places"""
-    with pytest.raises(InvalidCellValue):
-        avg_valid_value(num_str)
-
-@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{3}$", fullmatch=True))
-def test_cell_average_value_pass_if_three_decimal_places(num_str):
-    """Check that there is no error if the average value has 3 decimal places."""
-    processed = avg_valid_value(num_str)
-    assert (
-        isinstance(processed, float) and
-        processed == float(num_str))
-
-@given(num_str=st.from_regex(r"^[0-9]+\.([0-9]{0,5}$)", fullmatch=True))
-def test_cell_standard_error_value_errors_if_less_than_six_decimal_places(num_str):
-    """
-    Check that an error is raised if the standard error value does not have 6
-    decimal places
-    """
-    with pytest.raises(InvalidCellValue):
-        se_valid_value(num_str)
-
-@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{6,}$", fullmatch=True))
-def test_cell_standard_error_value_pass_if_six_or_more_decimal_places(num_str):
-    """
-    Check that there is no error if the standard error value has 3 decimal
-    places.
-    """
-    processed = se_valid_value(num_str)
-    assert (
-        isinstance(processed, float) and
-        processed == float(num_str))
-
-## ================================================================================
+from quality_control.average import invalid_value as avg_invalid_value
+from quality_control.standard_error import invalid_value as se_invalid_value
 
 @given(num_str=st.from_regex(
     r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True))
diff --git a/tests/qc/test_error_collection.py b/tests/qc/test_error_collection.py
index 466f455..fe85bb1 100644
--- a/tests/qc/test_error_collection.py
+++ b/tests/qc/test_error_collection.py
@@ -1,33 +1,9 @@
+"""Check that error collection works as expected"""
+
 import pytest
 
-from quality_control.parsing import take, FileType, parse_errors
-from quality_control.parsing import collect_errors
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
-    "filepath,filetype,seek_pos",
-    (("tests/test_data/average_crlf.tsv", FileType.AVERAGE, 0),
-     ("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE,
-      205500004 # Skip first 500K lines
-      ),
-     ("tests/test_data/average.tsv", FileType.AVERAGE, 0),
-     ("tests/test_data/standarderror_1_error_at_end.tsv",
-      FileType.STANDARD_ERROR, 0),
-     ("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR, 0),
-     ("tests/test_data/duplicated_headers_no_data_errors.tsv",
-      FileType.AVERAGE, 0)))
-def test_parse_errors(filepath, filetype, strains, seek_pos):
-    """
-    Check that only errors are returned, and that certain properties hold for
-    said errors.
-    """
-    for error in parse_errors(filepath, filetype, strains, seek_pos):
-        assert isinstance(error, dict)
-        assert "filepath" in error
-        assert "filetype" in error
-        assert "position" in error
-        assert "error" in error and isinstance(error["error"], str)
-        assert "message" in error
+from quality_control.errors import InvalidValue, DuplicateHeading
+from quality_control.parsing import take, FileType, collect_errors
 
 @pytest.mark.parametrize(
     "sample,num,expected",
@@ -35,13 +11,11 @@ def test_parse_errors(filepath, filetype, strains, seek_pos):
      ([0, 1, 2, 3], 200, [0, 1, 2, 3]),
      (("he", "is", "a", "lovely", "boy"), 3, ["he", "is", "a"])))
 def test_take(sample, num, expected):
+    """Check that `take` works correctly."""
     taken = take(sample, num)
     assert len(taken) <= num
     assert taken == expected
 
-
-## ==================================================
-
 @pytest.mark.slow
 @pytest.mark.parametrize(
     "filepath,filetype,count",
@@ -55,4 +29,8 @@ def test_take(sample, num, expected):
      ("tests/test_data/duplicated_headers_no_data_errors.tsv",
       FileType.AVERAGE, 10)))
 def test_collect_errors(filepath, filetype, strains, count):
-    assert len(collect_errors(filepath, filetype, strains, count)) <= count
+    """Check that `collect_errors` works as expected."""
+    results = take(collect_errors(filepath, filetype, strains), count)
+    def __valid_instance(item):
+        return isinstance(item, (InvalidValue, DuplicateHeading))
+    assert all(__valid_instance(error) for error in results)
diff --git a/tests/qc/test_header.py b/tests/qc/test_header.py
index f860a71..2557e85 100644
--- a/tests/qc/test_header.py
+++ b/tests/qc/test_header.py
@@ -3,42 +3,13 @@ import pytest
 from hypothesis import given
 from hypothesis import strategies as st
 
-from quality_control.headers import valid_header
 from quality_control.errors import InvalidValue, DuplicateHeading
-from quality_control.errors import DuplicateHeader, InvalidHeaderValue
 from quality_control.headers import (
     invalid_header, invalid_headings, duplicate_headings)
 
-@given(headers=st.lists(st.text(max_size=10)))
-def test_valid_header_errors_with_invalid_headers(headers):
-    "Verify that the check for header validity works"
-    with pytest.raises(InvalidHeaderValue):
-        valid_header(("BXD1", "BXD2", "BXD3"), headers)
-
-@pytest.mark.parametrize(
-    "strains,headers", [
-        (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1")),
-        (("AStrain", "AnotherStrain", "YetAnotherStrain"),
-         ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
-def test_valid_header_strains_passes_with_valid_headers(strains, headers):
-    "Verify that the check for header validity works"
-    assert valid_header(strains, headers)
-
-@pytest.mark.parametrize(
-    "strains,headers", [
-        (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1", "BXD1")),
-        (("AStrain", "AnotherStrain", "YetAnotherStrain"),
-         ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain",
-          "AStrain"))])
-def test_valid_header_fails_with_duplicate_headers(strains, headers):
-    """Check that parsing fails if any header is duplicated"""
-    with pytest.raises(DuplicateHeader):
-        valid_header(strains, headers)
-
-## ============================================================
-
 @given(headers=st.lists(st.text(max_size=10), max_size=1))
 def test_invalid_header_with_list_of_one_value(headers):
+    """Test `invalid_header` with invalid header row"""
     assert invalid_header(0, headers) == InvalidValue(
         0, 0, "<TAB>".join(headers),
         "The header MUST contain at least 2 columns")
@@ -51,13 +22,12 @@ def test_invalid_headings_with_invalid_inputs(headings):
         for col, heading in enumerate(headings, start=2))
 
 @pytest.mark.parametrize(
-    "strains,headers", [
-        (("BXD1", "BXD2", "BXD3"), ("ProbeSet", "BXD3", "BXD1")),
-        (("AStrain", "AnotherStrain", "YetAnotherStrain"),
-         ("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
-def test_invalid_header_with_valid_headers(strains, headers):
+    "headers", [
+        (("ProbeSet", "BXD3", "BXD1")),
+        (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
+def test_invalid_header_with_valid_headers(headers):
     "Verify that the check for header validity works"
-    assert invalid_header(0, headers) == None
+    assert invalid_header(0, headers) is None
 
 @pytest.mark.parametrize(
     "strains,headings", [
@@ -76,7 +46,7 @@ def test_invalid_headings_with_valid_headings(strains, headings):
 def test_duplicate_headers_with_repeated_column_headings(headers, repeated):
     """Check that parsing fails if any header is duplicated"""
     assert duplicate_headings(0, headers) == tuple(
-        DuplicateHeading(0, head, cols, (
+        DuplicateHeading(0, cols, head, (
             f"Heading '{head}', is repeated in columns "
             f"{','.join(str(i) for i in cols)}"))
         for head, cols in repeated.items())
@@ -88,4 +58,3 @@ def test_duplicate_headers_with_repeated_column_headings(headers, repeated):
 def test_duplicate_headers_with_unique_column_headings(headers):
     """Check that parsing fails if any header is duplicated"""
     assert duplicate_headings(0, headers) == tuple()
-
diff --git a/tests/qc/test_parsing.py b/tests/qc/test_parsing.py
deleted file mode 100644
index 41739ad..0000000
--- a/tests/qc/test_parsing.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Test the parsing of the files"""
-import pytest
-
-from quality_control.errors import ParseError
-from quality_control.parsing import FileType, parse_file
-
-@pytest.mark.parametrize(
-    "filepath,filetype",
-    (("tests/test_data/average_crlf.tsv", FileType.STANDARD_ERROR),
-     ("tests/test_data/average_error_at_end_200MB.tsv",
-      FileType.STANDARD_ERROR),
-     ("tests/test_data/average.tsv", FileType.STANDARD_ERROR),
-     ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.AVERAGE),
-     ("tests/test_data/standarderror.tsv", FileType.AVERAGE),
-     ("tests/test_data/duplicated_headers_no_data_errors.tsv",
-      FileType.STANDARD_ERROR),))
-def test_parse_file_fails_with_wrong_filetype_declaration(filepath, filetype, strains):
-    """Check that parsing fails if the wrong file type is declared"""
-    with pytest.raises(ParseError):
-        for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable]
-            pass
-
-@pytest.mark.parametrize(
-    "filepath,filetype",
-    (("tests/test_data/no_data_errors.tsv", FileType.AVERAGE),))
-def test_parse_file_passes_with_valid_files(filepath, filetype, strains):
-    """Check that parsing succeeds with valid files"""
-    for line in parse_file(filepath, filetype, strains):
-        assert bool(line)
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
-    "filepath,filetype",
-    (("tests/test_data/average_large_no_errors.tsv", FileType.AVERAGE),
-     # ("tests/test_data/average_no_errors.tsv", FileType.AVERAGE),
-     # ("tests/test_data/standarderror_no_errors.tsv", FileType.STANDARD_ERROR),
-     ))
-def test_parse_file_works_with_large_files(filepath, filetype, strains):
-    """Check that parsing succeeds even with large files."""
-    for line in parse_file(filepath, filetype, strains):
-        assert bool(line)
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
-    "filepath,filetype",
-    (("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE),
-     ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.STANDARD_ERROR),
-     ("tests/test_data/duplicated_headers_no_data_errors.tsv", FileType.AVERAGE)))
-def test_parse_file_raises_exception_on_error_in_file(filepath, filetype, strains):
-    "Check that parsing fails if any error is found in a file"
-    with pytest.raises(ParseError):
-        for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable]
-            pass