import pytest from quality_control.parsing import take, FileType, parse_errors @pytest.mark.slow @pytest.mark.parametrize( "filepath,filetype,seek_pos", (("tests/test_data/average_crlf.tsv", FileType.AVERAGE, 0), ("tests/test_data/average_error_at_end_200MB.tsv", FileType.AVERAGE, 205500004 # Skip first 500K lines ), ("tests/test_data/average.tsv", FileType.AVERAGE, 0), ("tests/test_data/standarderror_1_error_at_end.tsv", FileType.STANDARD_ERROR, 0), ("tests/test_data/standarderror.tsv", FileType.STANDARD_ERROR, 0), ("tests/test_data/duplicated_headers_no_data_errors.tsv", FileType.AVERAGE, 0))) def test_parse_errors(filepath, filetype, strains, seek_pos): """ Check that only errors are returned, and that certain properties hold for said errors. """ for error in parse_errors(filepath, filetype, strains, seek_pos): assert isinstance(error, dict) assert "filepath" in error assert "filetype" in error assert "position" in error assert "error" in error and isinstance(error["error"], str) assert "message" in error @pytest.mark.parametrize( "sample,num,expected", ((range(0,25), 5, [0, 1, 2, 3, 4]), ([0, 1, 2, 3], 200, [0, 1, 2, 3]), (("he", "is", "a", "lovely", "boy"), 3, ["he", "is", "a"]))) def test_take(sample, num, expected): taken = take(sample, num) assert len(taken) <= num assert taken == expected