aboutsummaryrefslogtreecommitdiff
path: root/r_qtl
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-21 09:20:37 +0300
committerFrederick Muriuki Muriithi2024-02-21 09:20:37 +0300
commit075b554cdde11f32e73981222a2cede3bb249151 (patch)
treeaccd26a54b67c358a4a0983ac6efec59af0ee359 /r_qtl
parent6462099372626e11706219a695e8303250359510 (diff)
downloadgn-uploader-075b554cdde11f32e73981222a2cede3bb249151.tar.gz
Check that samples/cases are consistent
Ensure that **ALL** samples/cases/individuals mentioned in any of the pheno files actually exist in at least one of the geno files.
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py34
1 files changed, 14 insertions, 20 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 1e28bc0..87491d0 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -368,27 +368,21 @@ def read_geno_file_data(
replace_genotype_codes, genocodes=cdata.get("genotypes", {})))
-def load_samples(
- zipfilepath: Union[str, Path], filetype: str) -> tuple[str, ...]:
- """Load the samples/cases/individuals from file(s) of type 'filetype'."""
- cdata = read_control_file(zipfilepath)
+def load_samples(zipfilepath: Union[str, Path],
+ member: str,
+ transposed: bool) -> tuple[str, ...]:
+ """Load the samples/cases/individuals from file 'member'."""
+ filedata = read_geno_file_data(zipfilepath, member)
samples: set[str] = set()
- for afile in cdata.get(filetype, []):
- filedata = read_geno_file_data(zipfilepath, afile)
- if cdata.get(f"{filetype}_transposed", False):
+ if transposed:
+ samples.update(
+ item for item in next(filedata)[1:] if item is not None)
+ else:
+ try:
+ next(filedata)# Ignore first row.
samples.update(
- item for item in next(filedata)[1:] if item is not None)
- else:
- try:
- next(filedata)# Ignore first row.
- samples.update(
- line[0] for line in filedata if line[0] is not None)
- except StopIteration:# Empty file.
- pass
+ line[0] for line in filedata if line[0] is not None)
+ except StopIteration:# Empty file.
+ pass
return tuple(samples)
-
-
-load_geno_samples = partial(load_samples, filetype="geno")
-load_founder_geno_samples = partial(load_samples, filetype="founder_geno")
-load_pheno_samples = partial(load_samples, filetype="pheno")