about summary refs log tree commit diff
path: root/r_qtl
diff options
context:
space:
mode:
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py82
-rw-r--r--r_qtl/r_qtl2_qc.py2
2 files changed, 66 insertions, 18 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 9da4081..0ef487f 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -16,7 +16,11 @@ from r_qtl.exceptions import InvalidFormat, MissingFileException
 
 FILE_TYPES = (
     "geno", "founder_geno", "pheno", "covar", "phenocovar", "gmap", "pmap",
-    "phenose")
+    "phenose", "phenonum")
+
+__CONTROL_FILE_ERROR_MESSAGE__ = (
+    "The zipped bundle that was provided does not contain a valid control file "
+    "in either JSON or YAML format.")
 
 
 def __special_file__(filename):
@@ -72,6 +76,8 @@ def transpose_csv(
     def __read_by_line__(_path):
         with open(_path, "r", encoding="utf8") as infile:
             for line in infile:
+                if line.startswith("#"):
+                    continue
                 yield line
 
     transposed_data= (f"{linejoinerfn(items)}\n" for items in zip(*(
@@ -112,7 +118,7 @@ def __control_data_from_zipfile__(zfile: ZipFile) -> dict:
                            or filename.endswith(".json"))))
     num_files = len(files)
     if num_files == 0:
-        raise InvalidFormat("Expected a json or yaml control file.")
+        raise InvalidFormat(__CONTROL_FILE_ERROR_MESSAGE__)
 
     if num_files > 1:
         raise InvalidFormat("Found more than one possible control file.")
@@ -129,7 +135,6 @@ def __control_data_from_zipfile__(zfile: ZipFile) -> dict:
             else yaml.safe_load(zfile.read(files[0])))
     }
 
-
 def __control_data_from_dirpath__(dirpath: Path):
     """Load control data from a given directory path."""
     files = tuple(path for path in dirpath.iterdir()
@@ -137,7 +142,7 @@ def __control_data_from_dirpath__(dirpath: Path):
                       and (path.suffix in (".yaml", ".json"))))
     num_files = len(files)
     if num_files == 0:
-        raise InvalidFormat("Expected a json or yaml control file.")
+        raise InvalidFormat(__CONTROL_FILE_ERROR_MESSAGE__)
 
     if num_files > 1:
         raise InvalidFormat("Found more than one possible control file.")
@@ -182,7 +187,7 @@ def control_data(control_src: Union[Path, ZipFile]) -> dict:
     r_qtl.exceptions.InvalidFormat
     """
     def __cleanup__(cdata):
-        return {
+        _cdata = {
             **cdata,
             **dict((filetype,
                     ([cdata[filetype]] if isinstance(cdata[filetype], str)
@@ -190,6 +195,14 @@ def control_data(control_src: Union[Path, ZipFile]) -> dict:
                     ) for filetype in
                    (typ for typ in cdata.keys() if typ in FILE_TYPES))
         }
+        if "na.string" in _cdata:# handle common error in file.
+            _cdata = {
+                **cdata,
+                "na.strings": list(set(
+                    _cdata["na.string"] + _cdata["na.strings"]))
+            }
+
+        return _cdata
 
     if isinstance(control_src, ZipFile):
         return __cleanup__(__control_data_from_zipfile__(control_src))
@@ -200,8 +213,8 @@ def control_data(control_src: Union[Path, ZipFile]) -> dict:
         if control_src.is_dir():
             return __cleanup__(__control_data_from_dirpath__(control_src))
     raise InvalidFormat(
-        "Expects either a zipfile.ZipFile object or a pathlib.Path object "
-        "pointing to a directory containing the R/qtl2 bundle.")
+        "Expects either a zipped bundle of files or a path-like object "
+        "pointing to the zipped R/qtl2 bundle.")
 
 
 def replace_na_strings(cdata, val):
@@ -398,22 +411,19 @@ def file_data(zfile: ZipFile,
 
     try:
         if isinstance(cdata[member_key], list):
-            for row in (line for lines in
+            yield from (line for lines in
                         (file_data(
                             zfile, member_key, {**cdata, member_key: innerfile},
                             process_value, process_transposed_value)
                          for innerfile in cdata[member_key])
-                        for line in lines):
-                yield row
+                        for line in lines)
             return
         if not cdata.get(f"{member_key}_transposed", False):
-            for row in with_non_transposed(zfile, member_key, cdata, process_value):
-                yield row
+            yield from with_non_transposed(zfile, member_key, cdata, process_value)
             return
 
-        for row in with_transposed(
-                zfile, member_key, cdata, process_transposed_value):
-            yield row
+        yield from with_transposed(
+            zfile, member_key, cdata, process_transposed_value)
     except KeyError as exc:
         raise MissingFileException(*exc.args) from exc
 
@@ -464,8 +474,7 @@ def raw_file_data(zipfilepath: Union[str, Path],
     with (ZipFile(str(zipfilepath), "r") as zfile,
           zfile.open(memberfilename) as innerfile):
         wrappedfile = io.TextIOWrapper(innerfile)
-        for  line in wrappedfile:
-            yield line
+        yield from wrappedfile
 
 def strip_comments(rawdata: Iterator[str], commentchar) -> Iterator[str]:
     """Remove comments from raw text."""
@@ -549,3 +558,42 @@ def load_samples(zipfilepath: Union[str, Path],
             pass
 
     return tuple(samples)
+
+
+
+def read_text_file(filepath: Union[str, Path]) -> Iterator[str]:
+    """Read the raw text from a text file."""
+    with open(filepath, "r", encoding="utf8") as _file:
+        yield from _file
+
+
+def read_csv_file(filepath: Union[str, Path],
+                  separator: str = ",",
+                  comment_char: str = "#") -> Iterator[tuple[str, ...]]:
+    """Read a file as a csv file. This does not process the N/A values."""
+    for line in read_text_file(filepath):
+        if line.startswith(comment_char):
+            continue
+        yield tuple(field.strip() for field in line.split(separator))
+
+
+def read_csv_file_headers(
+        filepath: Union[str, Path],
+        transposed: bool,
+        separator: str = ",",
+        comment_char: str = "#"
+) -> tuple[str, ...]:
+    """Read the 'true' headers of a CSV file."""
+    headers = tuple()
+    for line in read_text_file(filepath):
+        if line.startswith(comment_char):
+            continue
+
+        line = tuple(field.strip() for field in line.split(separator))
+        if not transposed:
+            return line
+
+        headers = headers + (line[0],)
+        continue
+
+    return headers
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py
index 7b26b50..2d9e9a8 100644
--- a/r_qtl/r_qtl2_qc.py
+++ b/r_qtl/r_qtl2_qc.py
@@ -95,7 +95,7 @@ def missing_files(bundlesrc: Union[Path, ZipFile]) -> tuple[tuple[str, str], ...
         "pointing to a directory containing the R/qtl2 bundle.")
 
 
-def validate_bundle(zfile: ZipFile):
+def validate_bundle(zfile: Union[Path, ZipFile]):
     """Ensure the R/qtl2 bundle is valid."""
     missing = missing_files(zfile)
     if len(missing) > 0: