diff options
author | Arun Isaac | 2022-06-03 18:22:28 +0530 |
---|---|---|
committer | Arun Isaac | 2022-06-03 18:40:57 +0530 |
commit | 89c099319a4f5730cfff7d26219a43eab7490b83 (patch) | |
tree | d09dbe4b00087d8376a9d107c0faa0fb8a1912e9 /gn3/genodb.py | |
parent | c9dc23a573b399b17eb508af6c85a153c1910de4 (diff) | |
download | genenetwork3-89c099319a4f5730cfff7d26219a43eab7490b83.tar.gz |
gn3: Add genodb.
genodb is a tiny library to read our new genotype database file format.
* gn3/genodb.py: New file.
Diffstat (limited to 'gn3/genodb.py')
-rw-r--r-- | gn3/genodb.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/gn3/genodb.py b/gn3/genodb.py new file mode 100644 index 0000000..71e9994 --- /dev/null +++ b/gn3/genodb.py @@ -0,0 +1,34 @@ +import lmdb +import numpy as np + +class GenotypeDatabase: + def __init__(self, path): + self.env = lmdb.open(path) + self.txn = self.env.begin() + # 32 bytes in a SHA256 hash + self.hash_length = 32 + def __enter__(self): + return self + def __exit__(self, type, value, traceback): + self.txn.abort() + self.env.close() + def get(self, hash): + return self.txn.get(hash) + def get_metadata(self, hash, metadata): + return self.txn.get(hash + b':' + metadata.encode() + b'\0') + def matrix(self): + hash = self.get(b'current\0') + return Matrix(self, hash) + +class Matrix(): + def __init__(self, db, hash): + # TODO: Decide on endianness. + self.nrows = int.from_bytes(db.get_metadata(hash, 'nrows'), byteorder='little') + self.ncols = int.from_bytes(db.get_metadata(hash, 'ncols'), byteorder='little') + self.row_pointers = db.get(hash) + self.db = db + def row(self, index): + start = index * self.db.hash_length + end = start + self.db.hash_length + return np.frombuffer(self.db.get(self.row_pointers[start:end]), + dtype=np.uint8) |