From aacf63b16aa5d77232d38e45b04c717203d1a5af Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 17 Jul 2025 09:42:03 +0300 Subject: Skip malformed genotype rows with mismatched data columns. Signed-off-by: Munyoki Kilyungi --- scripts/lmdb_matrix.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/lmdb_matrix.py b/scripts/lmdb_matrix.py index 02981f0..a66f2d1 100644 --- a/scripts/lmdb_matrix.py +++ b/scripts/lmdb_matrix.py @@ -304,6 +304,13 @@ def read_genotype_file(genotype_file: str) -> GenotypeMatrix: continue meta, data = line[:len(metadata_columns) ], line[len(metadata_columns):] + # KLUDGE: It's not clear whether chromosome rows that + # start with a '#' should be a comment or not. For some + # there's a mismatch between (E.g. B6D2F2_mm8) the size of + # the data values and ncols. For now, skip them. + if len(data) != ncols: + i += 1 + continue for j, el in enumerate(data): match el: case _ if el.isdigit(): -- cgit 1.4.1