Skip malformed genotype rows with mismatched data columns.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
author: Munyoki Kilyungi 2025-07-17 09:42:03 +0300
committer: Munyoki Kilyungi 2025-07-17 09:42:03 +0300
commit: aacf63b16aa5d77232d38e45b04c717203d1a5af (patch)
tree: 2f6fb775c93ed09c8f41bd6b0b373f1d8aeaad24
parent: 15e338f376e9312b20ef660dc75a218739a95bee (diff)
download: genenetwork3-aacf63b16aa5d77232d38e45b04c717203d1a5af.tar.gz
1 files changed, 7 insertions, 0 deletions
diff --git a/scripts/lmdb_matrix.py b/scripts/lmdb_matrix.py
index 02981f0..a66f2d1 100644
--- a/scripts/lmdb_matrix.py
+++ b/scripts/lmdb_matrix.py
@@ -304,6 +304,13 @@ def read_genotype_file(genotype_file: str) -> GenotypeMatrix:
                 continue
             meta, data = line[:len(metadata_columns)
                               ], line[len(metadata_columns):]
+            # KLUDGE: It's not clear whether chromosome rows that
+            # start with a '#' should be a comment or not.  For some
+            # there's a mismatch between (E.g. B6D2F2_mm8) the size of
+            # the data values and ncols.  For now, skip them.
+            if len(data) != ncols:
+                i += 1
+                continue
             for j, el in enumerate(data):
                 match el:
                     case _ if el.isdigit():
author	Munyoki Kilyungi	2025-07-17 09:42:03 +0300
committer	Munyoki Kilyungi	2025-07-17 09:42:03 +0300
commit	aacf63b16aa5d77232d38e45b04c717203d1a5af (patch)
tree	2f6fb775c93ed09c8f41bd6b0b373f1d8aeaad24
parent	15e338f376e9312b20ef660dc75a218739a95bee (diff)
download	genenetwork3-aacf63b16aa5d77232d38e45b04c717203d1a5af.tar.gz