about summary refs log tree commit diff
path: root/r_qtl
diff options
context:
space:
mode:
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 8c17362..f03aff5 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -354,3 +354,33 @@ def read_file_data(
             strip_comments(
                 raw_file_data(zipfilepath, memberfilename),
                 cdata["comment.char"])))
+
+
+def read_geno_file_data(
+        zipfilepath: Union[str, Path],
+        memberfilename: str) -> Iterator[tuple[Optional[str], ...]]:
+    """Read a 'geno' file from the R/qtl2 bundle."""
+    cdata = read_control_file(zipfilepath)
+    return read_file_data(
+        zipfilepath,
+        memberfilename,
+        processfield=partial(
+            replace_genotype_codes, genocodes=cdata.get("genotypes", {})))
+
+
+def load_geno_samples(zipfilepath: Union[str, Path]) -> tuple[str, ...]:
+    """Load the samples/cases/individuals from the 'geno' file(s)."""
+    cdata = read_control_file(zipfilepath)
+    samples = set()
+    for genofile in cdata.get("geno", []):
+        gdata = read_geno_file_data(zipfilepath, genofile)
+        if cdata.get("geno_transposed", False):
+            samples.update(next(gdata)[1:])
+        else:
+            try:
+                next(gdata)# Ignore first row.
+                samples.update(line[0] for line in gdata)
+            except StopIteration:# Empty file.
+                pass
+
+    return tuple(samples)