about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-20 06:12:36 +0300
committerFrederick Muriuki Muriithi2024-02-20 06:12:36 +0300
commit2f4d2c691f2a40e506d7fc274a0fcd717a028f3d (patch)
tree723a524a2416d432efc3070ac1ba4b0f4301d7d3
parentb1483d974d30d162e12557f55e856ec7d79bad2e (diff)
downloadgn-uploader-2f4d2c691f2a40e506d7fc274a0fcd717a028f3d.tar.gz
Read samples from geno file.
-rw-r--r--r_qtl/r_qtl2.py30
-rw-r--r--tests/r_qtl/test_r_qtl2_geno.py13
2 files changed, 43 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 8c17362..f03aff5 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -354,3 +354,33 @@ def read_file_data(
             strip_comments(
                 raw_file_data(zipfilepath, memberfilename),
                 cdata["comment.char"])))
+
+
+def read_geno_file_data(
+        zipfilepath: Union[str, Path],
+        memberfilename: str) -> Iterator[tuple[Optional[str], ...]]:
+    """Read a 'geno' file from the R/qtl2 bundle."""
+    cdata = read_control_file(zipfilepath)
+    return read_file_data(
+        zipfilepath,
+        memberfilename,
+        processfield=partial(
+            replace_genotype_codes, genocodes=cdata.get("genotypes", {})))
+
+
+def load_geno_samples(zipfilepath: Union[str, Path]) -> tuple[str, ...]:
+    """Load the samples/cases/individuals from the 'geno' file(s)."""
+    cdata = read_control_file(zipfilepath)
+    samples = set()
+    for genofile in cdata.get("geno", []):
+        gdata = read_geno_file_data(zipfilepath, genofile)
+        if cdata.get("geno_transposed", False):
+            samples.update(next(gdata)[1:])
+        else:
+            try:
+                next(gdata)# Ignore first row.
+                samples.update(line[0] for line in gdata)
+            except StopIteration:# Empty file.
+                pass
+
+    return tuple(samples)
diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py
index c33984e..d3c77e6 100644
--- a/tests/r_qtl/test_r_qtl2_geno.py
+++ b/tests/r_qtl/test_r_qtl2_geno.py
@@ -269,3 +269,16 @@ def test_parse_founder_geno_files(relpath, expected):
             "founder_geno",
             cdata,
             *rqtl2.make_process_data_geno(cdata))) == expected
+
+
+@pytest.mark.unit_test
+@pytest.mark.parametrize(
+    "filepath,expected",
+    (("tests/r_qtl/test_files/test_geno.zip",
+      ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")),
+     ("tests/r_qtl/test_files/test_geno_transposed.zip",
+      ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")),
+     ("tests/r_qtl/test_files/test_geno_multiple.zip",
+      ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))))
+def test_load_geno_samples(filepath, expected):
+    assert sorted(rqtl2.load_geno_samples(filepath)) == sorted(expected)