aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander_Kabui2023-05-03 00:12:00 +0300
committerAlexander_Kabui2023-05-03 00:12:00 +0300
commitc096d1ae63e445379c7732c494e2eebb1c1728dd (patch)
treed98a974202ffe8ad9ede36eea77a63f40bc67d41
parent37a4910662ff412e4853001ee2bbe3037f4848a9 (diff)
downloadgenenetwork2-c096d1ae63e445379c7732c494e2eebb1c1728dd.tar.gz
code refactoring
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py44
-rw-r--r--wqflask/wqflask/correlation/rust_correlation.py11
2 files changed, 10 insertions, 45 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 7a1690dd..ddcc5ba9 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -64,8 +64,6 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"})
return str.translate(filename, ttable)
-
-
def __generate_file_name__(db_name):
# todo add expiry time and checker
@@ -77,51 +75,19 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
if (results):
return __sanitise_filename__(
f"ProbeSetFreezeId_{results[0]}_{results[1]}")
- """
-
- def __fetch_id_positions__(all_ids, target_ids):
- _vals = []
- _posit = [0] # alternative for parsing
-
- for (idx, strain) in enumerate(all_ids, 1):
- if strain in target_ids:
- _vals.append(target_ids[strain])
- _posit.append(idx)
-
- return (_posit, _vals)
-
- with open(file_path) as csv_file:
- csv_reader = csv.reader(csv_file, delimiter=',')
- _posit, sample_vals = __fetch_id_positions__(
- next(csv_reader)[1:], sample_dict)
- return (sample_vals, [[line[i] for i in _posit] for line in csv_reader])
-
-
- """
-
-
try:
- with lmdb.open(os.path.join("/tmp","Probesets"),readonly=True,lock=False) as env:
+ # change this to tmpdir
+ with lmdb.open(os.path.join(TMPDIR,"Probesets"),readonly=True,lock=False) as env:
with env.begin() as txn:
filename = __generate_file_name__ (dataset_name)
if filename:
- data = txn.get(filename.encode())
-
-
- col_ids = pickle.loads(data)["data"]
-
- data = pickle.loads(data)["strain_names"]
-
- return (col_ids,data)
-
- # parse
-
+ meta = pickle.loads(txn.get(filename.encode()))
+ return (meta["strain_names"],meta["data"])
return {}
-
except Exception as error:
- breakpoint()
return {}
+
def fetch_all_cached_metadata(dataset_name):
"""in a gvein dataset fetch all the traits metadata"""
file_name = generate_filename(dataset_name, suffix="metadata")
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index 0661fa42..ea63d244 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -257,12 +257,11 @@ def __compute_sample_corr__(
if not bool(sample_data):
return {}
if target_dataset.type == "ProbeSet" and start_vars.get("use_cache") == "true":
- with database_connection() as conn:
- results = read_lmdb_strain_files("ProbeSets",target_dataset.name)
- if results:
- (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1])
- return run_correlation(target_data, sample_vals,
- method, ",", corr_type, n_top)
+ results = read_lmdb_strain_files("ProbeSets",target_dataset.name)
+ if results:
+ (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1])
+ return run_correlation(target_data, sample_vals,
+ method, ",", corr_type, n_top)
target_dataset.get_trait_data(list(sample_data.keys()))
def __merge_key_and_values__(rows, current):