about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py44
-rw-r--r--wqflask/wqflask/correlation/rust_correlation.py11
2 files changed, 10 insertions, 45 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 7a1690dd..ddcc5ba9 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -64,8 +64,6 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
         ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"})
         return str.translate(filename, ttable)
 
-
-
     def __generate_file_name__(db_name):     
         # todo add expiry time and checker
 
@@ -77,51 +75,19 @@ def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
                 if (results):
                     return __sanitise_filename__(
                         f"ProbeSetFreezeId_{results[0]}_{results[1]}")
-    """
-
-    def __fetch_id_positions__(all_ids, target_ids):
-        _vals = []
-        _posit = [0]  # alternative for parsing
-
-        for (idx, strain) in enumerate(all_ids, 1):
-            if strain in target_ids:
-                _vals.append(target_ids[strain])
-                _posit.append(idx)
-
-        return (_posit, _vals)
-
-    with open(file_path) as csv_file:
-        csv_reader = csv.reader(csv_file, delimiter=',')
-        _posit, sample_vals = __fetch_id_positions__(
-            next(csv_reader)[1:], sample_dict)
-        return (sample_vals, [[line[i] for i in _posit] for line in csv_reader])
-
-
-    """
-
-
     try:
-        with lmdb.open(os.path.join("/tmp","Probesets"),readonly=True,lock=False) as env:
+        # change this to tmpdir
+        with lmdb.open(os.path.join(TMPDIR,"Probesets"),readonly=True,lock=False) as env:
             with env.begin() as txn:
                 filename = __generate_file_name__ (dataset_name)
                 if filename:
-                    data = txn.get(filename.encode())
-  
-
-                    col_ids = pickle.loads(data)["data"]
-
-                    data = pickle.loads(data)["strain_names"]
-
-                    return (col_ids,data)
-
-                    # parse 
-               
+                    meta = pickle.loads(txn.get(filename.encode()))
+                    return  (meta["strain_names"],meta["data"])             
                 return {}
-
     except Exception as error:
-        breakpoint()
         return {}
 
+
 def fetch_all_cached_metadata(dataset_name):
     """in a gvein dataset fetch all the traits metadata"""
     file_name = generate_filename(dataset_name, suffix="metadata")
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index 0661fa42..ea63d244 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -257,12 +257,11 @@ def __compute_sample_corr__(
     if not bool(sample_data):
         return {}
     if target_dataset.type == "ProbeSet" and start_vars.get("use_cache") == "true":
-        with database_connection() as conn:
-            results = read_lmdb_strain_files("ProbeSets",target_dataset.name)
-            if results:
-                (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1])
-                return run_correlation(target_data, sample_vals,
-                                   method, ",", corr_type, n_top)
+        results = read_lmdb_strain_files("ProbeSets",target_dataset.name)
+        if results:
+            (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1])
+            return run_correlation(target_data, sample_vals,
+                               method, ",", corr_type, n_top)
     target_dataset.get_trait_data(list(sample_data.keys()))
 
     def __merge_key_and_values__(rows, current):