aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--quality_control/average.py3
-rw-r--r--quality_control/errors.py5
-rw-r--r--quality_control/parsing.py13
-rw-r--r--quality_control/standard_error.py3
-rw-r--r--tests/conftest.py3
-rw-r--r--tests/qc/test_cells.py27
-rw-r--r--tests/qc/test_header.py2
-rw-r--r--tests/qc/test_parsing.py9
-rw-r--r--tests/strategies.py8
9 files changed, 48 insertions, 25 deletions
diff --git a/quality_control/average.py b/quality_control/average.py
index c552ba3..3261e1c 100644
--- a/quality_control/average.py
+++ b/quality_control/average.py
@@ -4,7 +4,8 @@ import re
from .errors import InvalidCellValue
def valid_value(val):
- if re.search("^[0-9]+\.[0-9]{3}$", val):
+ """Checks whether `val` is a valid value for averages"""
+ if re.search(r"^[0-9]+\.[0-9]{3}$", val):
return float(val)
raise InvalidCellValue(
f"Invalid value '{val}'.\n"
diff --git a/quality_control/errors.py b/quality_control/errors.py
index 993748c..0802159 100644
--- a/quality_control/errors.py
+++ b/quality_control/errors.py
@@ -19,5 +19,6 @@ class DuplicateHeader(Exception):
super().__init__(self, *args)
class ParseError(Exception):
- def __init(self, *args):
- super().__init__(*args)
+ """Raised if any of the above exceptions are raised"""
+ def __init__(self, *args):
+ super().__init__(self, *args)
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 8b2715a..6e5bb8f 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -1,3 +1,5 @@
+"""Module handling the high-level parsing of the files"""
+
import csv
from enum import Enum
from functools import reduce
@@ -9,11 +11,13 @@ from quality_control.errors import (
ParseError, DuplicateHeader, InvalidCellValue, InvalidHeaderValue)
class FileType(Enum):
+ """Enumerate the expected file types"""
AVERAGE = 1
STANDARD_ERROR = 2
def parse_strains(filepath):
- with open(filepath) as strains_file:
+ """Parse the strains file"""
+ with open(filepath, encoding="utf8") as strains_file:
reader = csv.DictReader(
strains_file,
fieldnames=[
@@ -43,6 +47,7 @@ LINE_PARSERS = {
}
def strain_names(strains):
+ """Retrieve a complete list of the names of the strains"""
def __extract_strain_names(acc, strain):
return acc + tuple(
item for item in (strain["Name"], strain["Name2"])
@@ -50,6 +55,7 @@ def strain_names(strains):
return reduce(__extract_strain_names, strains, tuple())
def parse_file(filepath: str, filetype: FileType, strains: list):
+ """Parse the given file"""
seek_pos = 0
try:
with open(filepath, encoding="utf-8") as input_file:
@@ -66,5 +72,6 @@ def parse_file(filepath: str, filetype: FileType, strains: list):
"filepath": filepath,
"filetype": filetype,
"position": seek_pos,
- "line_number": line_number
- })
+ "line_number": line_number,
+ "error": err
+ }) from err
diff --git a/quality_control/standard_error.py b/quality_control/standard_error.py
index 7b49913..805c30e 100644
--- a/quality_control/standard_error.py
+++ b/quality_control/standard_error.py
@@ -4,7 +4,8 @@ import re
from .errors import InvalidCellValue
def valid_value(val):
- if re.search("^[0-9]+\.[0-9]{6,}$", val):
+ """Checks whether `val` is a valid value for standard errors"""
+ if re.search(r"^[0-9]+\.[0-9]{6,}$", val):
return float(val)
raise InvalidCellValue(
f"Invalid value '{val}'.\n"
diff --git a/tests/conftest.py b/tests/conftest.py
index 0cdba3e..f79166d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,4 @@
-from functools import reduce
+"""Set up fixtures for tests"""
import pytest
@@ -6,4 +6,5 @@ from quality_control.parsing import strain_names, parse_strains
@pytest.fixture(scope="session")
def strains():
+ """Parse the strains once every test session"""
return strain_names(parse_strains("strains.csv"))
diff --git a/tests/qc/test_cells.py b/tests/qc/test_cells.py
index 5e25a9a..d4ef911 100644
--- a/tests/qc/test_cells.py
+++ b/tests/qc/test_cells.py
@@ -8,33 +8,46 @@ from quality_control.errors import InvalidCellValue
from quality_control.average import valid_value as avg_valid_value
from quality_control.standard_error import valid_value as se_valid_value
-@given(num_str=st.from_regex("^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True))
+@given(num_str=st.from_regex(
+ r"^(?!([0-9]+\.([0-9]{3}|[0-9]{6,}))).*", fullmatch=True))
def test_cell_value_errors_with_invalid_inputs(num_str):
+ """Check that an error is raised for a cell with an invalid value."""
with pytest.raises(InvalidCellValue):
avg_valid_value(num_str)
with pytest.raises(InvalidCellValue):
se_valid_value(num_str)
-@given(num_str=st.from_regex("^[0-9]+\.([0-9]{1,2}|[0-9]{4,}$)", fullmatch=True))
+@given(num_str=st.from_regex(
+ r"^[0-9]+\.([0-9]{1,2}|[0-9]{4,}$)", fullmatch=True))
def test_cell_average_value_errors_if_not_three_decimal_places(num_str):
+ """Check that an error is raised if the average value does not have 3 decimal places"""
with pytest.raises(InvalidCellValue):
avg_valid_value(num_str)
-@given(num_str=st.from_regex("^[0-9]+\.[0-9]{3}$", fullmatch=True))
+@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{3}$", fullmatch=True))
def test_cell_average_value_pass_if_three_decimal_places(num_str):
+ """Check that there is no error if the average value has 3 decimal places."""
processed = avg_valid_value(num_str)
assert (
isinstance(processed, float) and
- processed == float(num_str))
+ processed == float(num_str))
-@given(num_str=st.from_regex("^[0-9]+\.([0-9]{0,5}$)", fullmatch=True))
+@given(num_str=st.from_regex(r"^[0-9]+\.([0-9]{0,5}$)", fullmatch=True))
def test_cell_standard_error_value_errors_if_less_than_six_decimal_places(num_str):
+ """
+ Check that an error is raised if the standard error value does not have 6
+ decimal places
+ """
with pytest.raises(InvalidCellValue):
se_valid_value(num_str)
-@given(num_str=st.from_regex("^[0-9]+\.[0-9]{6,}$", fullmatch=True))
+@given(num_str=st.from_regex(r"^[0-9]+\.[0-9]{6,}$", fullmatch=True))
def test_cell_standard_error_value_pass_if_six_or_more_decimal_places(num_str):
+ """
+ Check that there is no error if the standard error value has 3 decimal
+ places.
+ """
processed = se_valid_value(num_str)
assert (
isinstance(processed, float) and
- processed == float(num_str))
+ processed == float(num_str))
diff --git a/tests/qc/test_header.py b/tests/qc/test_header.py
index a474834..6ca9376 100644
--- a/tests/qc/test_header.py
+++ b/tests/qc/test_header.py
@@ -1,3 +1,4 @@
+"""Test the parsing of headers"""
import pytest
from hypothesis import given
from hypothesis import strategies as st
@@ -27,5 +28,6 @@ def test_valid_header_strains_passes_with_valid_headers(strains, headers):
("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain",
"AStrain"))])
def test_valid_header_fails_with_duplicate_headers(strains, headers):
+ """Check that parsing fails if any header is duplicated"""
with pytest.raises(DuplicateHeader):
valid_header(strains, headers)
diff --git a/tests/qc/test_parsing.py b/tests/qc/test_parsing.py
index be13d9b..6c784d5 100644
--- a/tests/qc/test_parsing.py
+++ b/tests/qc/test_parsing.py
@@ -1,3 +1,4 @@
+"""Test the parsing of the files"""
import pytest
from quality_control.errors import ParseError
@@ -14,8 +15,9 @@ from quality_control.parsing import FileType, parse_file
("tests/test_data/duplicated_headers_no_data_errors.tsv",
FileType.STANDARD_ERROR),))
def test_parse_file_fails_with_wrong_filetype_declaration(filepath, filetype, strains):
+ """Check that parsing fails if the wrong file type is declared"""
with pytest.raises(ParseError):
- for line in parse_file(filepath, filetype, strains):
+ for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable]
pass
@pytest.mark.parametrize(
@@ -24,6 +26,7 @@ def test_parse_file_fails_with_wrong_filetype_declaration(filepath, filetype, st
("tests/test_data/average.tsv", FileType.AVERAGE),
("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR)))
def test_parse_file_passes_with_valid_files(filepath, filetype, strains):
+ """Check that parsing succeeds with valid files"""
for line in parse_file(filepath, filetype, strains):
assert bool(line)
@@ -33,6 +36,7 @@ def test_parse_file_passes_with_valid_files(filepath, filetype, strains):
("tests/test_data/average.tsv", FileType.AVERAGE),
("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR)))
def test_parse_file_works_with_large_files(filepath, filetype, strains):
+ """Check that parsing succeeds even with large files."""
for line in parse_file(filepath, filetype, strains):
assert bool(line)
@@ -43,6 +47,7 @@ def test_parse_file_works_with_large_files(filepath, filetype, strains):
("tests/test_data/standarderror_1_error_at_end.tsv", FileType.STANDARD_ERROR),
("tests/test_data/duplicated_headers_no_data_errors.tsv", FileType.AVERAGE)))
def test_parse_file_raises_exception_on_error_in_file(filepath, filetype, strains):
+ "Check that parsing fails if any error is found in a file"
with pytest.raises(ParseError):
- for line in parse_file(filepath, filetype, strains):
+ for line in parse_file(filepath, filetype, strains): # pylint: disable=[unused-variable]
pass
diff --git a/tests/strategies.py b/tests/strategies.py
deleted file mode 100644
index ccda362..0000000
--- a/tests/strategies.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""Module holding custom data generation strategies"""
-
-from hypothesis.strategies import characters, composite
-
-@composite
-def average_numbers_string(draw):
- num = draw(floats(allow_nan=False, allow_infinity=False,))
- return f"{num:.3f}"