about summary refs log tree commit diff
path: root/quality_control
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control')
-rw-r--r--quality_control/parsing.py18
1 files changed, 11 insertions, 7 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index b7b0ff5..8b2715a 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -1,6 +1,6 @@
 import csv
-
 from enum import Enum
+from functools import reduce
 
 import quality_control.average as avg
 import quality_control.standard_error as se
@@ -28,7 +28,7 @@ def parse_strains(filepath):
 
 def __parse_header(line, strains):
     return valid_header(
-        strains,
+        set(strains),
         tuple(header.strip() for header in line.split("\t")))
 
 def __parse_average_line(line):
@@ -42,16 +42,20 @@ LINE_PARSERS = {
     FileType.STANDARD_ERROR: __parse_standard_error_line
 }
 
-def parse_file(filepath: str, filetype: FileType, strains_filepath: str):
+def strain_names(strains):
+    def __extract_strain_names(acc, strain):
+        return acc + tuple(
+            item for item in (strain["Name"], strain["Name2"])
+            if (item is not None and item != ""))
+    return reduce(__extract_strain_names, strains, tuple())
+
+def parse_file(filepath: str, filetype: FileType, strains: list):
     seek_pos = 0
     try:
         with open(filepath, encoding="utf-8") as input_file:
             for line_number, line in enumerate(input_file):
                 if line_number == 0:
-                    yield __parse_header(
-                        line,
-                        tuple(strain["Name"] for strain
-                              in parse_strains(strains_filepath)))
+                    yield __parse_header(line, strains)
                     seek_pos = seek_pos + len(line)
 
                 yield LINE_PARSERS[filetype](