diff options
Diffstat (limited to 'quality_control')
-rw-r--r-- | quality_control/parsing.py | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py index b7b0ff5..8b2715a 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -1,6 +1,6 @@ import csv - from enum import Enum +from functools import reduce import quality_control.average as avg import quality_control.standard_error as se @@ -28,7 +28,7 @@ def parse_strains(filepath): def __parse_header(line, strains): return valid_header( - strains, + set(strains), tuple(header.strip() for header in line.split("\t"))) def __parse_average_line(line): @@ -42,16 +42,20 @@ LINE_PARSERS = { FileType.STANDARD_ERROR: __parse_standard_error_line } -def parse_file(filepath: str, filetype: FileType, strains_filepath: str): +def strain_names(strains): + def __extract_strain_names(acc, strain): + return acc + tuple( + item for item in (strain["Name"], strain["Name2"]) + if (item is not None and item != "")) + return reduce(__extract_strain_names, strains, tuple()) + +def parse_file(filepath: str, filetype: FileType, strains: list): seek_pos = 0 try: with open(filepath, encoding="utf-8") as input_file: for line_number, line in enumerate(input_file): if line_number == 0: - yield __parse_header( - line, - tuple(strain["Name"] for strain - in parse_strains(strains_filepath))) + yield __parse_header(line, strains) seek_pos = seek_pos + len(line) yield LINE_PARSERS[filetype]( |