about summary refs log tree commit diff
path: root/quality_control/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control/parsing.py')
-rw-r--r--quality_control/parsing.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 8b2715a..6e5bb8f 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -1,3 +1,5 @@
+"""Module handling the high-level parsing of the files"""
+
 import csv
 from enum import Enum
 from functools import reduce
@@ -9,11 +11,13 @@ from quality_control.errors import (
     ParseError, DuplicateHeader, InvalidCellValue, InvalidHeaderValue)
 
 class FileType(Enum):
+    """Enumerate the expected file types"""
     AVERAGE = 1
     STANDARD_ERROR = 2
 
 def parse_strains(filepath):
-    with open(filepath) as strains_file:
+    """Parse the strains file"""
+    with open(filepath, encoding="utf8") as strains_file:
         reader = csv.DictReader(
             strains_file,
             fieldnames=[
@@ -43,6 +47,7 @@ LINE_PARSERS = {
 }
 
 def strain_names(strains):
+    """Retrieve a complete list of the names of the strains"""
     def __extract_strain_names(acc, strain):
         return acc + tuple(
             item for item in (strain["Name"], strain["Name2"])
@@ -50,6 +55,7 @@ def strain_names(strains):
     return reduce(__extract_strain_names, strains, tuple())
 
 def parse_file(filepath: str, filetype: FileType, strains: list):
+    """Parse the given file"""
     seek_pos = 0
     try:
         with open(filepath, encoding="utf-8") as input_file:
@@ -66,5 +72,6 @@ def parse_file(filepath: str, filetype: FileType, strains: list):
             "filepath": filepath,
             "filetype": filetype,
             "position": seek_pos,
-            "line_number": line_number
-        })
+            "line_number": line_number,
+            "error": err
+        }) from err