aboutsummaryrefslogtreecommitdiff
path: root/quality_control/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control/parsing.py')
-rw-r--r--quality_control/parsing.py27
1 files changed, 12 insertions, 15 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index c545937..f7a664f 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -4,6 +4,9 @@ from enum import Enum
from functools import partial
from typing import Tuple, Union, Generator, Callable, Optional
+import MySQLdb as mdb
+from MySQLdb.cursors import DictCursor
+
import quality_control.average as avg
from quality_control.file_utils import open_file
import quality_control.standard_error as se
@@ -17,21 +20,15 @@ class FileType(Enum):
AVERAGE = 1
STANDARD_ERROR = 2
-def strain_names(filepath):
- """Retrieve the strains names from given file"""
- strains = set()
- with open(filepath, encoding="utf8") as strains_file:
- for idx, line in enumerate(strains_file.readlines()):
- if idx > 0:
- parts = line.split()
- for name in (parts[1], parts[2]):
- strains.add(name.strip())
- if len(parts) >= 6:
- alias = parts[5].strip()
- if alias != "" and alias not in ("P", "\\N"):
- strains.add(alias)
-
- return strains
+def strain_names(dbconn: mdb.Connection, speciesid: int) -> tuple[str, ...]:
+ """Retrieve samples/cases from database."""
+ with dbconn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM Strain WHERE SpeciesId=%s",
+ (speciesid,))
+ samplenames = ((row["Name"], row["Name2"]) for row in cursor.fetchall())
+ return tuple(set(filter(
+ lambda item: bool(item.strip() if item is not None else item),
+ (name for names in samplenames for name in names))))
def header_errors(line_number, fields, strains):
"""Gather all header row errors."""