aboutsummaryrefslogtreecommitdiff
path: root/quality_control/headers.py
blob: b7bc01e159f47e63aae8d23ea2c0cbef20d74908 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
"""Validate the headers"""

from quality_control.errors import DuplicateHeader, InvalidHeaderValue

def valid_header(strains, headers):
    "Return the valid headers with reference to strains or throw an error"
    if not bool(headers[1:]):
        raise InvalidHeaderValue(
        "The header MUST contain at least 2 columns")
    invalid_headers = tuple(
        header for header in headers[1:] if header not in strains)
    if invalid_headers:
        raise InvalidHeaderValue(
            *(f"'{header}' not a valid strain." for header in invalid_headers))

    unique_headers = set(headers)
    if len(unique_headers) != len(headers):
        repeated = (
            (header, headers.count(header))
            for header in unique_headers if headers.count(header) > 1)
        raise DuplicateHeader(*(
            f"'{header}' is present in the header row {times} times."
            for header, times in repeated))

    return headers