aboutsummaryrefslogtreecommitdiff
path: root/tests/qc/test_header.py
blob: 2557e85d0aa38513a8542fc2f0f58cd356360db7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Test the parsing of headers"""
import pytest
from hypothesis import given
from hypothesis import strategies as st

from quality_control.errors import InvalidValue, DuplicateHeading
from quality_control.headers import (
    invalid_header, invalid_headings, duplicate_headings)

@given(headers=st.lists(st.text(max_size=10), max_size=1))
def test_invalid_header_with_list_of_one_value(headers):
    """Test `invalid_header` with invalid header row"""
    assert invalid_header(0, headers) == InvalidValue(
        0, 0, "<TAB>".join(headers),
        "The header MUST contain at least 2 columns")

@given(headings=st.lists(st.text(min_size=2, max_size=10), min_size=2))
def test_invalid_headings_with_invalid_inputs(headings):
    "Verify that the check for header validity works"
    assert invalid_headings(0, ("BXD1", "BXD2", "BXD3"), headings) == tuple(
        InvalidValue(0, col, heading, f"'{heading}' not a valid strain.")
        for col, heading in enumerate(headings, start=2))

@pytest.mark.parametrize(
    "headers", [
        (("ProbeSet", "BXD3", "BXD1")),
        (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain"))])
def test_invalid_header_with_valid_headers(headers):
    "Verify that the check for header validity works"
    assert invalid_header(0, headers) is None

@pytest.mark.parametrize(
    "strains,headings", [
        (("BXD1", "BXD2", "BXD3"), ("BXD3", "BXD1")),
        (("AStrain", "AnotherStrain", "YetAnotherStrain"),
         ("AStrain", "AnotherStrain", "YetAnotherStrain"))])
def test_invalid_headings_with_valid_headings(strains, headings):
    "Verify that the check for header validity works"
    assert invalid_headings(0, strains, headings) == tuple()

@pytest.mark.parametrize(
    "headers,repeated", [
        (("ProbeSet", "BXD3", "BXD1", "BXD1"), {"BXD1": (3, 4)}),
        (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain",
          "AStrain"), {"AStrain": (2, 5)})])
def test_duplicate_headers_with_repeated_column_headings(headers, repeated):
    """Check that parsing fails if any header is duplicated"""
    assert duplicate_headings(0, headers) == tuple(
        DuplicateHeading(0, cols, head, (
            f"Heading '{head}', is repeated in columns "
            f"{','.join(str(i) for i in cols)}"))
        for head, cols in repeated.items())

@pytest.mark.parametrize(
    "headers", [
        (("ProbeSet", "BXD3", "BXD1")),
        (("Individual", "AStrain", "AnotherStrain", "YetAnotherStrain",))])
def test_duplicate_headers_with_unique_column_headings(headers):
    """Check that parsing fails if any header is duplicated"""
    assert duplicate_headings(0, headers) == tuple()