diff options
| author | Munyoki Kilyungi | 2025-07-17 09:42:03 +0300 |
|---|---|---|
| committer | Munyoki Kilyungi | 2025-07-17 09:42:03 +0300 |
| commit | aacf63b16aa5d77232d38e45b04c717203d1a5af (patch) | |
| tree | 2f6fb775c93ed09c8f41bd6b0b373f1d8aeaad24 | |
| parent | 15e338f376e9312b20ef660dc75a218739a95bee (diff) | |
| download | genenetwork3-aacf63b16aa5d77232d38e45b04c717203d1a5af.tar.gz | |
Skip malformed genotype rows with mismatched data columns.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
| -rw-r--r-- | scripts/lmdb_matrix.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/scripts/lmdb_matrix.py b/scripts/lmdb_matrix.py index 02981f0..a66f2d1 100644 --- a/scripts/lmdb_matrix.py +++ b/scripts/lmdb_matrix.py @@ -304,6 +304,13 @@ def read_genotype_file(genotype_file: str) -> GenotypeMatrix: continue meta, data = line[:len(metadata_columns) ], line[len(metadata_columns):] + # KLUDGE: It's not clear whether chromosome rows that + # start with a '#' should be a comment or not. For some + # there's a mismatch between (E.g. B6D2F2_mm8) the size of + # the data values and ncols. For now, skip them. + if len(data) != ncols: + i += 1 + continue for j, el in enumerate(data): match el: case _ if el.isdigit(): |
