diff options
author | Frederick Muriuki Muriithi | 2024-02-21 09:20:37 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-02-21 09:20:37 +0300 |
commit | 075b554cdde11f32e73981222a2cede3bb249151 (patch) | |
tree | accd26a54b67c358a4a0983ac6efec59af0ee359 /r_qtl | |
parent | 6462099372626e11706219a695e8303250359510 (diff) | |
download | gn-uploader-075b554cdde11f32e73981222a2cede3bb249151.tar.gz |
Check that samples/cases are consistent
Ensure that **ALL** samples/cases/individuals mentioned in any of the
pheno files actually exist in at least one of the geno files.
Diffstat (limited to 'r_qtl')
-rw-r--r-- | r_qtl/r_qtl2.py | 34 |
1 files changed, 14 insertions, 20 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index 1e28bc0..87491d0 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -368,27 +368,21 @@ def read_geno_file_data( replace_genotype_codes, genocodes=cdata.get("genotypes", {}))) -def load_samples( - zipfilepath: Union[str, Path], filetype: str) -> tuple[str, ...]: - """Load the samples/cases/individuals from file(s) of type 'filetype'.""" - cdata = read_control_file(zipfilepath) +def load_samples(zipfilepath: Union[str, Path], + member: str, + transposed: bool) -> tuple[str, ...]: + """Load the samples/cases/individuals from file 'member'.""" + filedata = read_geno_file_data(zipfilepath, member) samples: set[str] = set() - for afile in cdata.get(filetype, []): - filedata = read_geno_file_data(zipfilepath, afile) - if cdata.get(f"{filetype}_transposed", False): + if transposed: + samples.update( + item for item in next(filedata)[1:] if item is not None) + else: + try: + next(filedata)# Ignore first row. samples.update( - item for item in next(filedata)[1:] if item is not None) - else: - try: - next(filedata)# Ignore first row. - samples.update( - line[0] for line in filedata if line[0] is not None) - except StopIteration:# Empty file. - pass + line[0] for line in filedata if line[0] is not None) + except StopIteration:# Empty file. + pass return tuple(samples) - - -load_geno_samples = partial(load_samples, filetype="geno") -load_founder_geno_samples = partial(load_samples, filetype="founder_geno") -load_pheno_samples = partial(load_samples, filetype="pheno") |