about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2025-07-17 09:42:03 +0300
committerMunyoki Kilyungi2025-07-17 09:42:03 +0300
commitaacf63b16aa5d77232d38e45b04c717203d1a5af (patch)
tree2f6fb775c93ed09c8f41bd6b0b373f1d8aeaad24
parent15e338f376e9312b20ef660dc75a218739a95bee (diff)
downloadgenenetwork3-aacf63b16aa5d77232d38e45b04c717203d1a5af.tar.gz
Skip malformed genotype rows with mismatched data columns.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--scripts/lmdb_matrix.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/scripts/lmdb_matrix.py b/scripts/lmdb_matrix.py
index 02981f0..a66f2d1 100644
--- a/scripts/lmdb_matrix.py
+++ b/scripts/lmdb_matrix.py
@@ -304,6 +304,13 @@ def read_genotype_file(genotype_file: str) -> GenotypeMatrix:
                 continue
             meta, data = line[:len(metadata_columns)
                               ], line[len(metadata_columns):]
+            # KLUDGE: It's not clear whether chromosome rows that
+            # start with a '#' should be a comment or not.  For some
+            # there's a mismatch between (E.g. B6D2F2_mm8) the size of
+            # the data values and ncols.  For now, skip them.
+            if len(data) != ncols:
+                i += 1
+                continue
             for j, el in enumerate(data):
                 match el:
                     case _ if el.isdigit():