diff options
Diffstat (limited to 'quality_control/parsing.py')
-rw-r--r-- | quality_control/parsing.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py index 8b2715a..6e5bb8f 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -1,3 +1,5 @@ +"""Module handling the high-level parsing of the files""" + import csv from enum import Enum from functools import reduce @@ -9,11 +11,13 @@ from quality_control.errors import ( ParseError, DuplicateHeader, InvalidCellValue, InvalidHeaderValue) class FileType(Enum): + """Enumerate the expected file types""" AVERAGE = 1 STANDARD_ERROR = 2 def parse_strains(filepath): - with open(filepath) as strains_file: + """Parse the strains file""" + with open(filepath, encoding="utf8") as strains_file: reader = csv.DictReader( strains_file, fieldnames=[ @@ -43,6 +47,7 @@ LINE_PARSERS = { } def strain_names(strains): + """Retrieve a complete list of the names of the strains""" def __extract_strain_names(acc, strain): return acc + tuple( item for item in (strain["Name"], strain["Name2"]) @@ -50,6 +55,7 @@ def strain_names(strains): return reduce(__extract_strain_names, strains, tuple()) def parse_file(filepath: str, filetype: FileType, strains: list): + """Parse the given file""" seek_pos = 0 try: with open(filepath, encoding="utf-8") as input_file: @@ -66,5 +72,6 @@ def parse_file(filepath: str, filetype: FileType, strains: list): "filepath": filepath, "filetype": filetype, "position": seek_pos, - "line_number": line_number - }) + "line_number": line_number, + "error": err + }) from err |