diff options
-rw-r--r-- | wqflask/wqflask/correlation/pre_computes.py | 92 |
1 files changed, 89 insertions, 3 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index 720eab09..7a1690dd 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -7,11 +7,13 @@ import datetime import lmdb import pickle from pathlib import Path - +from gn3.db_utils import database_connection from base.data_set import query_table_timestamp from base.webqtlConfig import TEXTDIR from base.webqtlConfig import TMPDIR +from utility.tools import SQL_URI + from json.decoder import JSONDecodeError def cache_trait_metadata(dataset_name, data): @@ -30,8 +32,7 @@ def cache_trait_metadata(dataset_name, data): def read_trait_metadata(dataset_name,dataset_type): try: - with lmdb.open(os.path.join("/tmp/",f"metadata_{dataset_type}"), - readonly=True, lock=False) as env: + with lmdb.open(os.path.join("/tmp/",f"metadata_{dataset_type}"), readonly=True, lock=False) as env: with env.begin() as txn: metadata = txn.get(dataset_name.encode()) return (pickle.loads(metadata)["data"] if metadata else {}) @@ -39,6 +40,88 @@ def read_trait_metadata(dataset_name,dataset_type): return {} + +def parse_lmdb_dataset(strain_names,target_strains,data): + _vals = [] + _posit = [0] + def __fetch_id_positions__(all_ids, target_ids): + _vals = [] + _posit = [0] # alternative for parsing + + for (idx, strain) in enumerate(strain_names, 1): + if strain in target_strains: + _vals.append(target_strains[strain]) + _posit.append(idx) + + return (_posit, _vals) + _posit,sample_vals = __fetch_id_positions__(strain_names,target_strains) + return (sample_vals,[[line[i] for i in _posit] for line in data.values()]) + +def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI): + # target file path for example probeset and name used to generate the name + + def __sanitise_filename__(filename): + ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"}) + return str.translate(filename, ttable) + + + + def __generate_file_name__(db_name): + # todo add expiry time and checker + + with database_connection() as conn: + with conn.cursor() as cursor: + cursor.execute( + 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (db_name,)) + results = cursor.fetchone() + if (results): + return __sanitise_filename__( + f"ProbeSetFreezeId_{results[0]}_{results[1]}") + """ + + def __fetch_id_positions__(all_ids, target_ids): + _vals = [] + _posit = [0] # alternative for parsing + + for (idx, strain) in enumerate(all_ids, 1): + if strain in target_ids: + _vals.append(target_ids[strain]) + _posit.append(idx) + + return (_posit, _vals) + + with open(file_path) as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') + _posit, sample_vals = __fetch_id_positions__( + next(csv_reader)[1:], sample_dict) + return (sample_vals, [[line[i] for i in _posit] for line in csv_reader]) + + + """ + + + try: + with lmdb.open(os.path.join("/tmp","Probesets"),readonly=True,lock=False) as env: + with env.begin() as txn: + filename = __generate_file_name__ (dataset_name) + if filename: + data = txn.get(filename.encode()) + + + col_ids = pickle.loads(data)["data"] + + data = pickle.loads(data)["strain_names"] + + return (col_ids,data) + + # parse + + return {} + + except Exception as error: + breakpoint() + return {} + def fetch_all_cached_metadata(dataset_name): """in a gvein dataset fetch all the traits metadata""" file_name = generate_filename(dataset_name, suffix="metadata") @@ -105,6 +188,9 @@ def fetch_text_file(dataset_name, conn, text_dir=TMPDIR): pass + + + def read_text_file(sample_dict, file_path): def __fetch_id_positions__(all_ids, target_ids): |