aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-20 06:12:36 +0300
committerFrederick Muriuki Muriithi2024-02-20 06:12:36 +0300
commit2f4d2c691f2a40e506d7fc274a0fcd717a028f3d (patch)
tree723a524a2416d432efc3070ac1ba4b0f4301d7d3
parentb1483d974d30d162e12557f55e856ec7d79bad2e (diff)
downloadgn-uploader-2f4d2c691f2a40e506d7fc274a0fcd717a028f3d.tar.gz
Read samples from geno file.
-rw-r--r--r_qtl/r_qtl2.py30
-rw-r--r--tests/r_qtl/test_r_qtl2_geno.py13
2 files changed, 43 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 8c17362..f03aff5 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -354,3 +354,33 @@ def read_file_data(
strip_comments(
raw_file_data(zipfilepath, memberfilename),
cdata["comment.char"])))
+
+
+def read_geno_file_data(
+ zipfilepath: Union[str, Path],
+ memberfilename: str) -> Iterator[tuple[Optional[str], ...]]:
+ """Read a 'geno' file from the R/qtl2 bundle."""
+ cdata = read_control_file(zipfilepath)
+ return read_file_data(
+ zipfilepath,
+ memberfilename,
+ processfield=partial(
+ replace_genotype_codes, genocodes=cdata.get("genotypes", {})))
+
+
+def load_geno_samples(zipfilepath: Union[str, Path]) -> tuple[str, ...]:
+ """Load the samples/cases/individuals from the 'geno' file(s)."""
+ cdata = read_control_file(zipfilepath)
+ samples = set()
+ for genofile in cdata.get("geno", []):
+ gdata = read_geno_file_data(zipfilepath, genofile)
+ if cdata.get("geno_transposed", False):
+ samples.update(next(gdata)[1:])
+ else:
+ try:
+ next(gdata)# Ignore first row.
+ samples.update(line[0] for line in gdata)
+ except StopIteration:# Empty file.
+ pass
+
+ return tuple(samples)
diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py
index c33984e..d3c77e6 100644
--- a/tests/r_qtl/test_r_qtl2_geno.py
+++ b/tests/r_qtl/test_r_qtl2_geno.py
@@ -269,3 +269,16 @@ def test_parse_founder_geno_files(relpath, expected):
"founder_geno",
cdata,
*rqtl2.make_process_data_geno(cdata))) == expected
+
+
+@pytest.mark.unit_test
+@pytest.mark.parametrize(
+ "filepath,expected",
+ (("tests/r_qtl/test_files/test_geno.zip",
+ ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")),
+ ("tests/r_qtl/test_files/test_geno_transposed.zip",
+ ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")),
+ ("tests/r_qtl/test_files/test_geno_multiple.zip",
+ ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))))
+def test_load_geno_samples(filepath, expected):
+ assert sorted(rqtl2.load_geno_samples(filepath)) == sorted(expected)