1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
import lmdb
import numpy as np
class GenotypeDatabase:
def __init__(self, path):
self.env = lmdb.open(path, readonly=True, create=False)
self.txn = self.env.begin()
# 32 bytes in a SHA256 hash
self.hash_length = 32
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.txn.abort()
self.env.close()
def get(self, hash):
return self.txn.get(hash)
def get_metadata(self, hash, metadata):
return self.txn.get(hash + b':' + metadata.encode())
def matrix(self):
hash = self.get(b'current')[0:self.hash_length]
return Matrix(self, hash)
class Matrix():
def __init__(self, db, hash):
self.nrows = int.from_bytes(db.get_metadata(hash, 'nrows'), byteorder='little')
self.ncols = int.from_bytes(db.get_metadata(hash, 'ncols'), byteorder='little')
row_column_pointers = db.get(hash)
self.row_pointers = row_column_pointers[0 : self.nrows*db.hash_length]
self.column_pointers = row_column_pointers[self.nrows*db.hash_length :]
self.db = db
def __vector(self, index, pointers):
start = index * self.db.hash_length
end = start + self.db.hash_length
return np.frombuffer(self.db.get(pointers[start:end]),
dtype=np.uint8)
def row(self, index):
return self.__vector(index, self.row_pointers)
def column(self, index):
return self.__vector(index, self.column_pointers)
|