aboutsummaryrefslogtreecommitdiff
path: root/r_qtl
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-20 06:12:36 +0300
committerFrederick Muriuki Muriithi2024-02-20 06:12:36 +0300
commit2f4d2c691f2a40e506d7fc274a0fcd717a028f3d (patch)
tree723a524a2416d432efc3070ac1ba4b0f4301d7d3 /r_qtl
parentb1483d974d30d162e12557f55e856ec7d79bad2e (diff)
downloadgn-uploader-2f4d2c691f2a40e506d7fc274a0fcd717a028f3d.tar.gz
Read samples from geno file.
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 8c17362..f03aff5 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -354,3 +354,33 @@ def read_file_data(
strip_comments(
raw_file_data(zipfilepath, memberfilename),
cdata["comment.char"])))
+
+
+def read_geno_file_data(
+ zipfilepath: Union[str, Path],
+ memberfilename: str) -> Iterator[tuple[Optional[str], ...]]:
+ """Read a 'geno' file from the R/qtl2 bundle."""
+ cdata = read_control_file(zipfilepath)
+ return read_file_data(
+ zipfilepath,
+ memberfilename,
+ processfield=partial(
+ replace_genotype_codes, genocodes=cdata.get("genotypes", {})))
+
+
+def load_geno_samples(zipfilepath: Union[str, Path]) -> tuple[str, ...]:
+ """Load the samples/cases/individuals from the 'geno' file(s)."""
+ cdata = read_control_file(zipfilepath)
+ samples = set()
+ for genofile in cdata.get("geno", []):
+ gdata = read_geno_file_data(zipfilepath, genofile)
+ if cdata.get("geno_transposed", False):
+ samples.update(next(gdata)[1:])
+ else:
+ try:
+ next(gdata)# Ignore first row.
+ samples.update(line[0] for line in gdata)
+ except StopIteration:# Empty file.
+ pass
+
+ return tuple(samples)