diff options
-rw-r--r-- | wqflask/wqflask/correlation/pre_computes.py | 44 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/rust_correlation.py | 11 |
2 files changed, 10 insertions, 45 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index 7a1690dd..ddcc5ba9 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -64,8 +64,6 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI): ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"}) return str.translate(filename, ttable) - - def __generate_file_name__(db_name): # todo add expiry time and checker @@ -77,51 +75,19 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI): if (results): return __sanitise_filename__( f"ProbeSetFreezeId_{results[0]}_{results[1]}") - """ - - def __fetch_id_positions__(all_ids, target_ids): - _vals = [] - _posit = [0] # alternative for parsing - - for (idx, strain) in enumerate(all_ids, 1): - if strain in target_ids: - _vals.append(target_ids[strain]) - _posit.append(idx) - - return (_posit, _vals) - - with open(file_path) as csv_file: - csv_reader = csv.reader(csv_file, delimiter=',') - _posit, sample_vals = __fetch_id_positions__( - next(csv_reader)[1:], sample_dict) - return (sample_vals, [[line[i] for i in _posit] for line in csv_reader]) - - - """ - - try: - with lmdb.open(os.path.join("/tmp","Probesets"),readonly=True,lock=False) as env: + # change this to tmpdir + with lmdb.open(os.path.join(TMPDIR,"Probesets"),readonly=True,lock=False) as env: with env.begin() as txn: filename = __generate_file_name__ (dataset_name) if filename: - data = txn.get(filename.encode()) - - - col_ids = pickle.loads(data)["data"] - - data = pickle.loads(data)["strain_names"] - - return (col_ids,data) - - # parse - + meta = pickle.loads(txn.get(filename.encode())) + return (meta["strain_names"],meta["data"]) return {} - except Exception as error: - breakpoint() return {} + def fetch_all_cached_metadata(dataset_name): """in a gvein dataset fetch all the traits metadata""" file_name = generate_filename(dataset_name, suffix="metadata") diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py index 0661fa42..ea63d244 100644 --- a/wqflask/wqflask/correlation/rust_correlation.py +++ b/wqflask/wqflask/correlation/rust_correlation.py @@ -257,12 +257,11 @@ def __compute_sample_corr__( if not bool(sample_data): return {} if target_dataset.type == "ProbeSet" and start_vars.get("use_cache") == "true": - with database_connection() as conn: - results = read_lmdb_strain_files("ProbeSets",target_dataset.name) - if results: - (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1]) - return run_correlation(target_data, sample_vals, - method, ",", corr_type, n_top) + results = read_lmdb_strain_files("ProbeSets",target_dataset.name) + if results: + (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1]) + return run_correlation(target_data, sample_vals, + method, ",", corr_type, n_top) target_dataset.get_trait_data(list(sample_data.keys())) def __merge_key_and_values__(rows, current): |