about summary refs log tree commit diff
path: root/gn3
diff options
context:
space:
mode:
authorArun Isaac2022-06-03 18:22:28 +0530
committerArun Isaac2022-06-03 18:40:57 +0530
commit89c099319a4f5730cfff7d26219a43eab7490b83 (patch)
treed09dbe4b00087d8376a9d107c0faa0fb8a1912e9 /gn3
parentc9dc23a573b399b17eb508af6c85a153c1910de4 (diff)
downloadgenenetwork3-89c099319a4f5730cfff7d26219a43eab7490b83.tar.gz
gn3: Add genodb.
genodb is a tiny library to read our new genotype database file format.

* gn3/genodb.py: New file.
Diffstat (limited to 'gn3')
-rw-r--r--gn3/genodb.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/gn3/genodb.py b/gn3/genodb.py
new file mode 100644
index 0000000..71e9994
--- /dev/null
+++ b/gn3/genodb.py
@@ -0,0 +1,34 @@
+import lmdb
+import numpy as np
+
+class GenotypeDatabase:
+    def __init__(self, path):
+        self.env = lmdb.open(path)
+        self.txn = self.env.begin()
+        # 32 bytes in a SHA256 hash
+        self.hash_length = 32
+    def __enter__(self):
+        return self
+    def __exit__(self, type, value, traceback):
+        self.txn.abort()
+        self.env.close()
+    def get(self, hash):
+        return self.txn.get(hash)
+    def get_metadata(self, hash, metadata):
+        return self.txn.get(hash + b':' + metadata.encode() + b'\0')
+    def matrix(self):
+        hash = self.get(b'current\0')
+        return Matrix(self, hash)
+
+class Matrix():
+    def __init__(self, db, hash):
+        # TODO: Decide on endianness.
+        self.nrows = int.from_bytes(db.get_metadata(hash, 'nrows'), byteorder='little')
+        self.ncols = int.from_bytes(db.get_metadata(hash, 'ncols'), byteorder='little')
+        self.row_pointers = db.get(hash)
+        self.db = db
+    def row(self, index):
+        start = index * self.db.hash_length
+        end = start + self.db.hash_length
+        return np.frombuffer(self.db.get(self.row_pointers[start:end]),
+                             dtype=np.uint8)