aboutsummaryrefslogtreecommitdiff
path: root/quality_control/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control/parsing.py')
-rw-r--r--quality_control/parsing.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 8b2715a..6e5bb8f 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -1,3 +1,5 @@
+"""Module handling the high-level parsing of the files"""
+
import csv
from enum import Enum
from functools import reduce
@@ -9,11 +11,13 @@ from quality_control.errors import (
ParseError, DuplicateHeader, InvalidCellValue, InvalidHeaderValue)
class FileType(Enum):
+ """Enumerate the expected file types"""
AVERAGE = 1
STANDARD_ERROR = 2
def parse_strains(filepath):
- with open(filepath) as strains_file:
+ """Parse the strains file"""
+ with open(filepath, encoding="utf8") as strains_file:
reader = csv.DictReader(
strains_file,
fieldnames=[
@@ -43,6 +47,7 @@ LINE_PARSERS = {
}
def strain_names(strains):
+ """Retrieve a complete list of the names of the strains"""
def __extract_strain_names(acc, strain):
return acc + tuple(
item for item in (strain["Name"], strain["Name2"])
@@ -50,6 +55,7 @@ def strain_names(strains):
return reduce(__extract_strain_names, strains, tuple())
def parse_file(filepath: str, filetype: FileType, strains: list):
+ """Parse the given file"""
seek_pos = 0
try:
with open(filepath, encoding="utf-8") as input_file:
@@ -66,5 +72,6 @@ def parse_file(filepath: str, filetype: FileType, strains: list):
"filepath": filepath,
"filetype": filetype,
"position": seek_pos,
- "line_number": line_number
- })
+ "line_number": line_number,
+ "error": err
+ }) from err