From 2f4d2c691f2a40e506d7fc274a0fcd717a028f3d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 20 Feb 2024 06:12:36 +0300 Subject: Read samples from geno file. --- r_qtl/r_qtl2.py | 30 ++++++++++++++++++++++++++++++ tests/r_qtl/test_r_qtl2_geno.py | 13 +++++++++++++ 2 files changed, 43 insertions(+) diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index 8c17362..f03aff5 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -354,3 +354,33 @@ def read_file_data( strip_comments( raw_file_data(zipfilepath, memberfilename), cdata["comment.char"]))) + + +def read_geno_file_data( + zipfilepath: Union[str, Path], + memberfilename: str) -> Iterator[tuple[Optional[str], ...]]: + """Read a 'geno' file from the R/qtl2 bundle.""" + cdata = read_control_file(zipfilepath) + return read_file_data( + zipfilepath, + memberfilename, + processfield=partial( + replace_genotype_codes, genocodes=cdata.get("genotypes", {}))) + + +def load_geno_samples(zipfilepath: Union[str, Path]) -> tuple[str, ...]: + """Load the samples/cases/individuals from the 'geno' file(s).""" + cdata = read_control_file(zipfilepath) + samples = set() + for genofile in cdata.get("geno", []): + gdata = read_geno_file_data(zipfilepath, genofile) + if cdata.get("geno_transposed", False): + samples.update(next(gdata)[1:]) + else: + try: + next(gdata)# Ignore first row. + samples.update(line[0] for line in gdata) + except StopIteration:# Empty file. + pass + + return tuple(samples) diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py index c33984e..d3c77e6 100644 --- a/tests/r_qtl/test_r_qtl2_geno.py +++ b/tests/r_qtl/test_r_qtl2_geno.py @@ -269,3 +269,16 @@ def test_parse_founder_geno_files(relpath, expected): "founder_geno", cdata, *rqtl2.make_process_data_geno(cdata))) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "filepath,expected", + (("tests/r_qtl/test_files/test_geno.zip", + ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")), + ("tests/r_qtl/test_files/test_geno_transposed.zip", + ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")), + ("tests/r_qtl/test_files/test_geno_multiple.zip", + ("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")))) +def test_load_geno_samples(filepath, expected): + assert sorted(rqtl2.load_geno_samples(filepath)) == sorted(expected) -- cgit v1.2.3