aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py59
-rw-r--r--wqflask/wqflask/correlation/rust_correlation.py5
2 files changed, 41 insertions, 23 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index ddcc5ba9..c995b471 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -16,34 +16,53 @@ from utility.tools import SQL_URI
from json.decoder import JSONDecodeError
-def cache_trait_metadata(dataset_name, data):
+def to_generate_datasets(dataset_name, dataset_type, gen_type, species="mouse"):
+ try:
+ with lmdb.open(os.path.join("/tmp", "todolist_generate"), map_size=20971520) as env:
+ with env.begin(write=True) as txn:
+ data = txn.get(f"{gen_type}:{dataset_type}".encode())
+ if data:
+ data = pickle.loads(data)
+ data[dataset_name] = (
+ dataset_type, dataset_name, species)
+ else:
+ data = {dataset_name: (
+ dataset_type, dataset_name, species)}
+
+ txn.put(f"{gen_type}:{dataset_type}".encode(), pickle.dumps(data))
+ except Exception as e:
+ pass
+
+
+def cache_trait_metadata(dataset_name, data):
try:
- with lmdb.open(os.path.join(TMPDIR,f"metadata_{dataset_name}"),map_size=20971520) as env:
- with env.begin(write=True) as txn:
+ with lmdb.open(os.path.join(TMPDIR, f"metadata_{dataset_name}"), map_size=20971520) as env:
+ with env.begin(write=True) as txn:
data_bytes = pickle.dumps(data)
txn.put(f"{dataset_name}".encode(), data_bytes)
current_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
txn.put(b"creation_date", current_date.encode())
return "success"
- except lmdb.Error as error:
+ except lmdb.Error as error:
pass
-def read_trait_metadata(dataset_name,dataset_type):
+
+def read_trait_metadata(dataset_name, dataset_type):
try:
- with lmdb.open(os.path.join("/tmp/",f"metadata_{dataset_type}"), readonly=True, lock=False) as env:
+ with lmdb.open(os.path.join("/tmp/", f"metadata_{dataset_type}"), readonly=True, lock=False) as env:
with env.begin() as txn:
- metadata = txn.get(dataset_name.encode())
+ metadata = txn.get(dataset_name.encode())
return (pickle.loads(metadata)["data"] if metadata else {})
except lmdb.Error as error:
return {}
-
-def parse_lmdb_dataset(strain_names,target_strains,data):
- _vals = []
+def parse_lmdb_dataset(strain_names, target_strains, data):
+ _vals = []
_posit = [0]
+
def __fetch_id_positions__(all_ids, target_ids):
_vals = []
_posit = [0] # alternative for parsing
@@ -54,17 +73,18 @@ def parse_lmdb_dataset(strain_names,target_strains,data):
_posit.append(idx)
return (_posit, _vals)
- _posit,sample_vals = __fetch_id_positions__(strain_names,target_strains)
- return (sample_vals,[[line[i] for i in _posit] for line in data.values()])
+ _posit, sample_vals = __fetch_id_positions__(strain_names, target_strains)
+ return (sample_vals, [[line[i] for i in _posit] for line in data.values()])
+
-def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
+def read_lmdb_strain_files(dataset_type, dataset_name, sql_uri=SQL_URI):
# target file path for example probeset and name used to generate the name
def __sanitise_filename__(filename):
ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"})
return str.translate(filename, ttable)
- def __generate_file_name__(db_name):
+ def __generate_file_name__(db_name):
# todo add expiry time and checker
with database_connection() as conn:
@@ -77,12 +97,12 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
f"ProbeSetFreezeId_{results[0]}_{results[1]}")
try:
# change this to tmpdir
- with lmdb.open(os.path.join(TMPDIR,"Probesets"),readonly=True,lock=False) as env:
+ with lmdb.open(os.path.join(TMPDIR, "Probesets"), readonly=True, lock=False) as env:
with env.begin() as txn:
- filename = __generate_file_name__ (dataset_name)
+ filename = __generate_file_name__(dataset_name)
if filename:
meta = pickle.loads(txn.get(filename.encode()))
- return (meta["strain_names"],meta["data"])
+ return (meta["strain_names"], meta["data"])
return {}
except Exception as error:
return {}
@@ -130,8 +150,6 @@ def generate_filename(*args, suffix="", file_ext="json"):
return f"{hashlib.md5(string_unicode).hexdigest()}_{suffix}.{file_ext}"
-
-
def fetch_text_file(dataset_name, conn, text_dir=TMPDIR):
"""fetch textfiles with strain vals if exists"""
@@ -154,9 +172,6 @@ def fetch_text_file(dataset_name, conn, text_dir=TMPDIR):
pass
-
-
-
def read_text_file(sample_dict, file_path):
def __fetch_id_positions__(all_ids, target_ids):
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index ea63d244..5f024440 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -16,7 +16,7 @@ from wqflask.correlation.pre_computes import write_db_to_textfile
from wqflask.correlation.pre_computes import read_trait_metadata
from wqflask.correlation.pre_computes import cache_trait_metadata
from wqflask.correlation.pre_computes import parse_lmdb_dataset
-
+from wqflask.correlation.pre_computes import to_generate_datasets
from wqflask.correlation.pre_computes import read_lmdb_strain_files
from gn3.computations.correlations import compute_all_lit_correlation
from gn3.computations.rust_correlation import run_correlation
@@ -73,6 +73,7 @@ def get_metadata(dataset, traits):
if cached_metadata:
return {trait:cached_metadata.get(trait) for trait in traits}
else:
+ to_generate_datasets(dataset.name, "ProbeSet", "metadata")
return {**({trait_name: {
"name": trait_name,
"view": True,
@@ -262,6 +263,8 @@ def __compute_sample_corr__(
(sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1])
return run_correlation(target_data, sample_vals,
method, ",", corr_type, n_top)
+ else:
+ to_generate_datasets(target_dataset.name, "ProbeSet", "textfile", target_dataset.group.species)
target_dataset.get_trait_data(list(sample_data.keys()))
def __merge_key_and_values__(rows, current):