"""Validate the headers""" from functools import reduce from typing import Union, Tuple, Sequence from quality_control.errors import InvalidValue, DuplicateHeading def invalid_header(filename: str, line_number: int, headers: Sequence[str]) -> Union[InvalidValue, None]: """Return an `InvalidValue` object if the header row has less than 2 items.""" if len(headers) < 2: return InvalidValue( filename, line_number, 0, "".join(headers), "The header MUST contain at least 2 columns") return None def invalid_headings( filename: str, line_number: int, strains: Sequence[str], headings: Sequence[str]) -> Union[Tuple[InvalidValue, ...], None]: """Return tuple of `InvalidValue` objects for each error found for every column heading.""" return tuple( InvalidValue(filename, line_number, col, header, f"'{header}' not a valid strain.") for col, header in enumerate(headings, start=2) if header not in strains) def duplicate_headings(filename: str, line_number: int, headers: Sequence[str]) -> Tuple[DuplicateHeading, ...]: """Return a tuple of `DuplicateHeading` objects for each column heading that is a duplicate of another column heading.""" def __update_columns__(acc, item): if item[1] in acc.keys(): return {**acc, item[1]: acc[item[1]] + (item[0],)} return {**acc, item[1]: (item[0],)} repeated = {# type: ignore[var-annotated] heading: columns for heading, columns in reduce(__update_columns__, enumerate(headers, start=1), {}).items() if len(columns) > 1 } return tuple( DuplicateHeading( filename, line_number, columns, heading, ( f"Heading '{heading}', is repeated in columns " f"{','.join(str(i) for i in columns)}")) for heading, columns in repeated.items())