aboutsummaryrefslogtreecommitdiff
path: root/tests/qc
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-04-21 10:24:29 +0300
committerFrederick Muriuki Muriithi2022-04-21 10:24:29 +0300
commitd70dbd0addb861aa37c2f2574a537319a75411c7 (patch)
treefc29c01279a5a1d6485e9dcf44c6d169dddfc73a /tests/qc
parent7b3dc9d36de1db28a6f36b03de85cf7f527231cc (diff)
downloadgn-uploader-d70dbd0addb861aa37c2f2574a537319a75411c7.tar.gz
Collect all the errors
Build a function to collect all the parsing errors into a "sequence" of dict objects containing the issues found.
Diffstat (limited to 'tests/qc')
-rw-r--r--tests/qc/test_error_collection.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/tests/qc/test_error_collection.py b/tests/qc/test_error_collection.py
new file mode 100644
index 0000000..c45803a
--- /dev/null
+++ b/tests/qc/test_error_collection.py
@@ -0,0 +1,30 @@
+import pytest
+
+from quality_control.parsing import FileType, parse_errors
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+ "filepath,filetype,seek_pos",
+ (("tests/test_data/average_crlf.tsv", FileType.AVERAGE, 0),
+ ("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE,
+ 205500004 # Skip first 500K lines
+ ),
+ ("tests/test_data/average.tsv", FileType.AVERAGE, 0),
+ ("tests/test_data/standarderror_1_error_at_end.tsv",
+ FileType.STANDARD_ERROR, 0),
+ ("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR, 0),
+ ("tests/test_data/duplicated_headers_no_data_errors.tsv",
+ FileType.AVERAGE),
+ ))
+def test_parse_errors(filepath, filetype, strains, seek_pos):
+ """
+ Check that only errors are returned, and that certain properties hold for
+ said errors.
+ """
+ for error in parse_errors(filepath, filetype, strains, seek_pos):
+ assert isinstance(error, dict)
+ assert "filepath" in error
+ assert "filetype" in error
+ assert "position" in error
+ assert "error" in error and isinstance(error["error"], str)
+ assert "message" in error