diff options
-rw-r--r-- | README.org | 6 | ||||
-rw-r--r-- | r_qtl/errors.py | 1 | ||||
-rw-r--r-- | r_qtl/r_qtl2.py | 45 |
3 files changed, 24 insertions, 28 deletions
@@ -65,14 +65,14 @@ flask run *** Checks -Run tests with: +Run unit tests with: #+BEGIN_SRC shell - pytest + pytest -k unit_test #+END_SRC To run the linter over the code base, run: #+BEGIN_SRC shell - pylint *.py tests quality_control qc_app scripts + pylint *.py tests quality_control qc_app r_qtl scripts #+END_SRC To check for correct type usage in the application, run: diff --git a/r_qtl/errors.py b/r_qtl/errors.py index 648611e..20c5ced 100644 --- a/r_qtl/errors.py +++ b/r_qtl/errors.py @@ -1,5 +1,4 @@ """R/qtl and R/qtl2 error types.""" -from collections import namedtuple class RQTLError(Exception): """Base class for R/qtl and R/qtl2 errors.""" diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index ec7a954..22cf62c 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -2,16 +2,16 @@ import io import csv import json -import yaml -from pathlib import Path +from zipfile import ZipFile from functools import reduce -from zipfile import ZipFile, ZipInfo, is_zipfile -from typing import Any, List, Union, Iterator, Iterable +from typing import Iterator, Iterable, Callable -from r_qtl.errors import InvalidFormat +import yaml from quality_control.parsing import take +from r_qtl.errors import InvalidFormat + def thread_op(value, *functions): """Thread the `value` through the sequence of `functions`.""" return reduce(lambda result, func: func(result), functions, value) @@ -80,19 +80,22 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: lines = (line.strip().split(cdata.get("sep", ",")) for line in filter(lambda line: not line.startswith("#"), io.TextIOWrapper(genofile))) - id_line = next(lines) - id_key, samples = id_line[0], id_line[1:] - def __organise_by_id__(acc, item): - row = acc.get(item[id_key], {}) - return {**acc, item[id_key]: {**row, **item}} - for _key, row in reduce(# type: ignore[var-annotated] - __organise_by_id__, - (row - for batch in __n_batch__(lines, 300) - for line in batch - for row in __merge__(id_key, samples, line)), - {}).items(): - yield row + try: + id_line = next(lines) + id_key, samples = id_line[0], id_line[1:] + def __organise_by_id__(acc, item): + row = acc.get(item[id_key], {}) + return {**acc, item[id_key]: {**row, **item}} + for _key, row in reduce(# type: ignore[var-annotated] + __organise_by_id__, + (row + for batch in __n_batch__(lines, 300) + for line in batch + for row in __merge__(id_key, samples, line)), + {}).items(): + yield row + except StopIteration: + return None def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> tuple[dict, ...]: """Read gmap files to get the genome mapping data""" @@ -125,9 +128,3 @@ def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> tuple[dict, ...]: lambda gmap, row: gmap + (dict(zip(headers, row)),), zip(*(line[1:] for line in lines)), tuple()) - -def read_r_qtl2_files(filepath: Path): - """Read R/qtl2 format zip files.""" - with ZipFile(filepath, "r") as zfile: - cf = control_data(zfile) - raise NotImplementedError("Implementation is incomplete.") |