diff options
author | Munyoki Kilyungi | 2023-02-06 14:32:27 +0300 |
---|---|---|
committer | BonfaceKilz | 2023-02-06 18:14:19 +0300 |
commit | 8bfb1f9e9f74f41eb251eb025838244cc5c256eb (patch) | |
tree | 9730e4f97a9d975ae43a6db8a62c8e866dc6dfd3 /gn3/db/matrix.py | |
parent | 30da2f48eb35360bb339d54da2ab83d96a1cf85b (diff) | |
download | genenetwork3-8bfb1f9e9f74f41eb251eb025838244cc5c256eb.tar.gz |
Add method to fetch the current matrix
* gn3/db/matrix.py: New file.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3/db/matrix.py')
-rw-r--r-- | gn3/db/matrix.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/gn3/db/matrix.py b/gn3/db/matrix.py new file mode 100644 index 0000000..d40f3c3 --- /dev/null +++ b/gn3/db/matrix.py @@ -0,0 +1,44 @@ +"""Methods for fetching data from the matrix stored in LMDB""" +from typing import Optional +from dataclasses import dataclass + +import struct +import json +import lmdb + +BLOB_HASH_DIGEST = 32 + + +@dataclass +class Matrix: + """Store sample data and any other relevant metadata""" + + data: list + metadata: dict + + +def get_current_matrix(db_path: str) -> Optional[Matrix]: + """Get the most recent matrix from DB_PATH. This is functionally + equivalent to get_nth_matrix(0, db_path)""" + env = lmdb.open(db_path) + with env.begin(write=False) as txn: + current_hash = txn.get(b"current") or b"" + matrix_hash = txn.get(current_hash + b":matrix") or b"" + row_pointers = txn.get(matrix_hash + b":row-pointers") + nrows = 0 + if matrix_hash: + (nrows,) = struct.unpack("<Q", txn.get(matrix_hash + b":nrows")) + data, metadata = None, None + if row_pointers: + return Matrix( + data=[ + json.loads(txn.get(row_pointers[i: i + 32]).decode()) + for i in range(0, nrows * 32, 32) + ], + metadata=json.loads( + txn.get(matrix_hash + b":metadata") + .rstrip(b"\x00") + .decode() + ), + ) + return None |