From fa8ef3e466e3919648e1d4cf9c38ed30328fc7a6 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Tue, 16 Aug 2022 12:11:36 +0300
Subject: minor fixes for computing all correlations

---
 wqflask/wqflask/correlation/rust_correlation.py | 69 ++++++++++++++-----------
 1 file changed, 38 insertions(+), 31 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index 94720f54..2a2ad4a0 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -39,15 +39,14 @@ def chunk_dataset(dataset,steps,name):
         strains = [trait_name] + [str(value) for (trait_name, strain, value) in matrix]
         results.append(",".join(strains))
 
-    breakpoint()
     return results
 
 
 def compute_top_n_sample(start_vars, dataset, trait_list):
-    """only if dataset is of type probeset"""
-
-
+    """check if dataset is of type probeset"""
 
+    if dataset.type!= "Probeset":
+        return  {}
 
     def __fetch_sample_ids__(samples_vals, samples_group):
 
@@ -73,19 +72,9 @@ def compute_top_n_sample(start_vars, dataset, trait_list):
 
             )
 
-            return dict(curr.fetchall())
-
-
-
-
-
+            return (sample_data,dict(curr.fetchall()))
 
-
-
-
-  
-
-    ty = __fetch_sample_ids__(start_vars["sample_vals"], start_vars["corr_samples_group"])
+    (sample_data,sample_ids) = __fetch_sample_ids__(start_vars["sample_vals"], start_vars["corr_samples_group"])
 
 
 
@@ -93,6 +82,8 @@ def compute_top_n_sample(start_vars, dataset, trait_list):
 
         curr = conn.cursor()
 
+        #fetching strain data in bulk
+
         curr.execute(
 
         """
@@ -104,15 +95,14 @@ def compute_top_n_sample(start_vars, dataset, trait_list):
             and ProbeSetFreeze.Name = '{}'
             and ProbeSet.Name in {}
             and ProbeSet.Id = ProbeSetXRef.ProbeSetId)
-           """.format(create_in_clause(list(ty.values())),dataset.name,create_in_clause(trait_list))
+           """.format(create_in_clause(list(sample_ids.values())),dataset.name,create_in_clause(trait_list))
 
 
         )
 
+        corr_data = chunk_dataset(list(curr.fetchall()),len(sample_ids.values()),dataset.name)
 
-
-
-        return chunk_dataset(list(curr.fetchall()),len(ty.values()),dataset.name)
+        return run_correlation(corr_data,list(sample_data.values()),"pearson",",")
 
 
 def compute_top_n_lit(corr_results, this_dataset, this_trait) -> dict:
@@ -170,7 +160,10 @@ def merge_results(dict_a: dict, dict_b: dict, dict_c: dict) -> list[dict]:
                 **dict_c.get(trait_name, {})
             }
         }
-    return [__merge__(tname, tcorrs) for tname, tcorrs in dict_a.items()]
+    results = [__merge__(tname, tcorrs) for tname, tcorrs in dict_a.items()]
+
+
+    return results
 
 
 def __compute_sample_corr__(
@@ -249,27 +242,41 @@ def compute_correlation_rust(
     }
     results = corr_type_fns[corr_type](
         start_vars, corr_type, method, n_top, target_trait_info)
+
     # END: Replace this with `match ...` once we hit Python 3.10
 
-    top_tissue_results = {}
-    top_lit_results = {}
 
+    top_a = top_b = {}
 
-    results = compute_top_n_sample(start_vars,target_dataset,list(results.keys()))
+    if compute_all:
 
+        if corr_type == "sample":
 
+            top_a = compute_top_n_tissue(
+            this_dataset, this_trait, results, method)
+        
+            top_b = compute_top_n_lit(results, this_dataset, this_trait)
 
-    breakpoint()
 
-    if compute_all:
-        # example compute of compute both correlation
-        top_tissue_results = compute_top_n_tissue(
+        elif corr_type == "lit":
+
+            #currently fails for lit
+
+            top_a = compute_top_n_sample(start_vars,target_dataset,list(results.keys()))
+            top_b =  compute_top_n_tissue(
             this_dataset, this_trait, results, method)
-        top_lit_results = compute_top_n_lit(results, this_dataset, this_trait)
 
-    return {
+        else:
+
+            top_a = compute_top_n_sample(start_vars,target_dataset,list(results.keys()))
+
+            top_b = compute_top_n_lit(results, this_dataset, this_trait)
+
+
+
+    return  {
         "correlation_results": merge_results(
-            results, top_tissue_results, top_lit_results),
+            results, top_a, top_b),
         "this_trait": this_trait.name,
         "target_dataset": start_vars['corr_dataset'],
         "return_results": n_top
-- 
cgit v1.2.3