about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander_Kabui2023-05-02 23:58:58 +0300
committerAlexander_Kabui2023-05-02 23:58:58 +0300
commit37a4910662ff412e4853001ee2bbe3037f4848a9 (patch)
treedf1285af99ea0588b1e5d0443b21c533507f9be9
parentb6cd1ccf160bf41a2e1f235e92f44b4dbee2f593 (diff)
downloadgenenetwork2-37a4910662ff412e4853001ee2bbe3037f4848a9.tar.gz
integrate lmdb code;remove textfiles fetching
-rw-r--r--wqflask/wqflask/correlation/rust_correlation.py38
1 files changed, 14 insertions, 24 deletions
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index 18f8c622..0661fa42 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -15,6 +15,9 @@ from wqflask.correlation.pre_computes import read_text_file
 from wqflask.correlation.pre_computes import write_db_to_textfile
 from wqflask.correlation.pre_computes import read_trait_metadata
 from wqflask.correlation.pre_computes import cache_trait_metadata
+from wqflask.correlation.pre_computes import  parse_lmdb_dataset
+
+from wqflask.correlation.pre_computes import read_lmdb_strain_files
 from gn3.computations.correlations import compute_all_lit_correlation
 from gn3.computations.rust_correlation import run_correlation
 from gn3.computations.rust_correlation import get_sample_corr_data
@@ -30,7 +33,7 @@ def query_probes_metadata(dataset, trait_list):
     if not bool(trait_list) or dataset.type != "ProbeSet":
         return []
 
-    with database_connection(SQL_URI) as conn:
+    with database_connection() as conn:
         with conn.cursor() as cursor:
 
             query = """
@@ -103,7 +106,7 @@ def chunk_dataset(dataset, steps, name):
                   ProbeSetXRef.ProbeSetId = ProbeSet.Id
     """.format(name)
 
-    with database_connection(SQL_URI) as conn:
+    with database_connection() as conn:
         with conn.cursor() as curr:
             curr.execute(query)
             traits_name_dict = dict(curr.fetchall())
@@ -127,7 +130,7 @@ def compute_top_n_sample(start_vars, dataset, trait_list):
             sample_data=json.loads(samples_vals),
             dataset_samples=dataset.group.all_samples_ordered())
 
-        with database_connection(SQL_URI) as conn:
+        with database_connection() as conn:
             with conn.cursor() as curr:
                 curr.execute(
                     """
@@ -145,7 +148,7 @@ def compute_top_n_sample(start_vars, dataset, trait_list):
     if len(trait_list) == 0:
         return {}
 
-    with database_connection(SQL_URI) as conn:
+    with database_connection() as conn:
         with conn.cursor() as curr:
             # fetching strain data in bulk
             query = (
@@ -181,7 +184,7 @@ def compute_top_n_lit(corr_results, target_dataset, this_trait) -> dict:
     geneid_dict = {trait_name: geneid for (trait_name, geneid)
                    in geneid_dict.items() if
                    corr_results.get(trait_name)}
-    with database_connection(SQL_URI) as conn:
+    with database_connection() as conn:
         return reduce(
             lambda acc, corr: {**acc, **corr},
             compute_all_lit_correlation(
@@ -253,26 +256,13 @@ def __compute_sample_corr__(
 
     if not bool(sample_data):
         return {}
-
     if target_dataset.type == "ProbeSet" and start_vars.get("use_cache") == "true":
-        with database_connection(SQL_URI) as conn:
-            file_path = fetch_text_file(target_dataset.name, conn)
-            if file_path:
-                (sample_vals, target_data) = read_text_file(
-                    sample_data, file_path)
-
+        with database_connection() as conn:
+            results = read_lmdb_strain_files("ProbeSets",target_dataset.name)
+            if results:
+                (sample_vals,target_data) = parse_lmdb_dataset(results[0],sample_data,results[1])
                 return run_correlation(target_data, sample_vals,
-                                       method, ",", corr_type, n_top)
-
-            write_db_to_textfile(target_dataset.name, conn)
-            file_path = fetch_text_file(target_dataset.name, conn)
-            if file_path:
-                (sample_vals, target_data) = read_text_file(
-                    sample_data, file_path)
-
-                return run_correlation(target_data, sample_vals,
-                                       method, ",", corr_type, n_top)
-
+                                   method, ",", corr_type, n_top)
     target_dataset.get_trait_data(list(sample_data.keys()))
 
     def __merge_key_and_values__(rows, current):
@@ -336,7 +326,7 @@ def __compute_lit_corr__(
     (this_trait_geneid, geneid_dict, species) = do_lit_correlation(
         this_trait, target_dataset)
 
-    with database_connection(SQL_URI) as conn:
+    with database_connection() as conn:
         return reduce(
             lambda acc, lit: {**acc, **lit},
             compute_all_lit_correlation(