From 9fedd52b1517c73ffeb19e90a2436e29d80bdd7a Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 26 Nov 2019 16:00:36 -0600
Subject: Added biweight correlation option, though still need to probably add
 sub-options for it

---
 wqflask/wqflask/correlation/show_corr_results.py   | 70 +++++++++++++++-------
 .../show_trait_calculate_correlations.html         |  1 +
 2 files changed, 49 insertions(+), 22 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index be27da7b..0654e5d3 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -34,6 +34,11 @@ import json
 
 import scipy
 import numpy
+import rpy2.robjects as ro                    # R Objects
+import rpy2.rinterface as ri
+
+from rpy2.robjects.packages import importr
+utils = importr("utils")
 
 from pprint import pformat as pf
 
@@ -258,6 +263,8 @@ class CorrelationResults(object):
             self.formatted_corr_type += "(Pearson's r)"
         elif self.corr_method == "spearman":
             self.formatted_corr_type += "(Spearman's rho)"
+        elif self.corr_method == "bicor":
+            self.formatted_corr_type += "(Biweight r)"
 
     def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1):
         """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each"""
@@ -446,7 +453,9 @@ class CorrelationResults(object):
         self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals)
 
         #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
-        if self.corr_method == 'pearson':
+        if self.corr_method == 'bicor':
+            sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals)
+        elif self.corr_method == 'pearson':
             sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals)
         else:
             sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals)
@@ -469,6 +478,22 @@ class CorrelationResults(object):
                     if not value.strip().lower() == 'x':
                         self.sample_data[str(sample)] = float(value)
 
+def do_bicor(this_trait_vals, target_trait_vals):
+    r_library = ro.r["library"]             # Map the library function
+    r_options = ro.r["options"]             # Map the options function
+
+    r_library("WGCNA")
+    r_bicor = ro.r["bicorAndPvalue"]        # Map the bicorAndPvalue function
+
+    r_options(stringsAsFactors = False)
+
+    this_vals = ro.Vector(this_trait_vals)
+    target_vals = ro.Vector(target_trait_vals)
+
+    the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)]
+
+    return the_r, the_p
+
 def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False):
     results_list = []
     for i, trait in enumerate(corr_results):
@@ -549,19 +574,19 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
 
 def get_header_fields(data_type, corr_method):
     if data_type == "ProbeSet":
-        if corr_method == "pearson":
+        if corr_method == "spearman":
             header_fields = ['Index',
                                 'Record',
                                 'Symbol',
                                 'Description',
                                 'Location',
                                 'Mean',
-                                'Sample r',
+                                'Sample rho',
                                 'N',
-                                'Sample p(r)',
-                                'Lit r',
-                                'Tissue r',
-                                'Tissue p(r)',
+                                'Sample p(rho)',
+                                'Lit rho',
+                                'Tissue rho',
+                                'Tissue p(rho)',
                                 'Max LRS',
                                 'Max LRS Location',
                                 'Additive Effect']
@@ -572,25 +597,25 @@ def get_header_fields(data_type, corr_method):
                                 'Description',
                                 'Location',
                                 'Mean',
-                                'Sample rho',
+                                'Sample r',
                                 'N',
-                                'Sample p(rho)',
-                                'Lit rho',
-                                'Tissue rho',
-                                'Tissue p(rho)',
+                                'Sample p(r)',
+                                'Lit r',
+                                'Tissue r',
+                                'Tissue p(r)',
                                 'Max LRS',
                                 'Max LRS Location',
                                 'Additive Effect']
     elif data_type == "Publish":
-        if corr_method == "pearson":
+        if corr_method == "spearman":
             header_fields = ['Index',
                             'Record',
                             'Description',
                             'Authors',
                             'Year',
-                            'Sample r',
+                            'Sample rho',
                             'N',
-                            'Sample p(r)',
+                            'Sample p(rho)',
                             'Max LRS',
                             'Max LRS Location',
                             'Additive Effect']
@@ -600,26 +625,27 @@ def get_header_fields(data_type, corr_method):
                             'Description',
                             'Authors',
                             'Year',
-                            'Sample rho',
+                            'Sample r',
                             'N',
-                            'Sample p(rho)',
+                            'Sample p(r)',
                             'Max LRS',
                             'Max LRS Location',
                             'Additive Effect']
+
     else:
-        if corr_method == "pearson":
+        if corr_method == "spearman":
             header_fields = ['Index',
                                 'ID',
                                 'Location',
-                                'Sample r',
+                                'Sample rho',
                                 'N',
-                                'Sample p(r)']
+                                'Sample p(rho)']
         else:
             header_fields = ['Index',
                                 'ID',
                                 'Location',
-                                'Sample rho',
+                                'Sample r',
                                 'N',
-                                'Sample p(rho)']
+                                'Sample p(r)']
 
     return header_fields
\ No newline at end of file
diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index 8ceec4fa..297d62ce 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -70,6 +70,7 @@
                 <select name="corr_sample_method" class="form-control">
                     <option value="pearson">Pearson</option>
                     <option value="spearman">Spearman Rank</option>
+                    <option value="bicor">Biweight Midcorrelation</option>
                 </select>
             </div>
         </div>
-- 
cgit 1.4.1