diff options
3 files changed, 79 insertions, 77 deletions
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 2f3df67a..f1cf3733 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -22,7 +22,7 @@ import collections import json import scipy import numpy -# import rpy2.robjects as ro # R Objects +import rpy2.robjects as ro # R Objects import utility.logger import utility.webqtlUtil @@ -459,9 +459,9 @@ class CorrelationResults: if num_overlap > 5: # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ - # if self.corr_method == 'bicor': - # sample_r, sample_p = do_bicor( - # self.this_trait_vals, target_vals) + if self.corr_method == 'bicor': + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr( self.this_trait_vals, target_vals) @@ -487,22 +487,22 @@ class CorrelationResults: self.sample_data[str(sample)] = float(value) -# def do_bicor(this_trait_vals, target_trait_vals): -# r_library = ro.r["library"] # Map the library function -# r_options = ro.r["options"] # Map the options function +def do_bicor(this_trait_vals, target_trait_vals): + r_library = ro.r["library"] # Map the library function + r_options = ro.r["options"] # Map the options function -# r_library("WGCNA") -# r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function + r_library("WGCNA") + r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function -# r_options(stringsAsFactors=False) + r_options(stringsAsFactors=False) -# this_vals = ro.Vector(this_trait_vals) -# target_vals = ro.Vector(target_trait_vals) + this_vals = ro.FloatVector(this_trait_vals) + target_vals = ro.FloatVector(target_trait_vals) -# the_r, the_p, _fisher_transform, _the_t, _n_obs = [ -# numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] -# return the_r, the_p + return the_r, the_p def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False): diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 9ac02ac5..e7b16e77 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -23,6 +23,9 @@ import math import random import string +import rpy2.robjects as ro +from rpy2.robjects.packages import importr + import numpy as np import scipy @@ -160,23 +163,22 @@ class CorrelationMatrix: for sample in self.all_sample_list: groups.append(1) - # Not doing PCA until rpy2 is excised self.pca_works = "False" - # try: - # corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - # corr_eigen_value, corr_eigen_vectors = sortEigenVectors( - # corr_result_eigen) - - # if self.do_PCA == True: - # self.pca_works = "True" - # self.pca_trait_ids = [] - # pca = self.calculate_pca( - # list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - # self.loadings_array = self.process_loadings() - # else: - # self.pca_works = "False" - # except: - # self.pca_works = "False" + try: + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + corr_result_eigen) + + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca( + list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" self.js_data = dict(traits=[trait.name for trait in self.traits], groups=groups, @@ -185,51 +187,51 @@ class CorrelationMatrix: samples=self.all_sample_list, sample_data=self.sample_data,) - # def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): - # base = importr('base') - # stats = importr('stats') - - # corr_results_to_list = robjects.FloatVector( - # [item for sublist in self.pca_corr_results for item in sublist]) - - # m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) - # eigen = base.eigen(m) - # pca = stats.princomp(m, cor="TRUE") - # self.loadings = pca.rx('loadings') - # self.scores = pca.rx('scores') - # self.scale = pca.rx('scale') - - # trait_array = zScore(self.trait_data_array) - # trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) - - # pca_traits = [] - # for i, vector in enumerate(trait_array_vectors): - # # ZS: Check if below check is necessary - # # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): - # pca_traits.append((vector * -1.0).tolist()) - - # this_group_name = self.trait_list[0][1].group.name - # temp_dataset = data_set.create_dataset( - # dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) - # temp_dataset.group.get_samplelist() - # for i, pca_trait in enumerate(pca_traits): - # trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ - # this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") - # this_vals_string = "" - # position = 0 - # for sample in temp_dataset.group.all_samples_ordered(): - # if sample in self.shared_samples_list: - # this_vals_string += str(pca_trait[position]) - # this_vals_string += " " - # position += 1 - # else: - # this_vals_string += "x " - # this_vals_string = this_vals_string[:-1] - - # Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) - # self.pca_trait_ids.append(trait_id) - - # return pca + def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): + base = importr('base') + stats = importr('stats') + + corr_results_to_list = ro.FloatVector( + [item for sublist in self.pca_corr_results for item in sublist]) + + m = ro.r.matrix(corr_results_to_list, nrow=len(cols)) + eigen = base.eigen(m) + pca = stats.princomp(m, cor="TRUE") + self.loadings = pca.rx('loadings') + self.scores = pca.rx('scores') + self.scale = pca.rx('scale') + + trait_array = zScore(self.trait_data_array) + trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) + + pca_traits = [] + for i, vector in enumerate(trait_array_vectors): + # ZS: Check if below check is necessary + # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): + pca_traits.append((vector * -1.0).tolist()) + + this_group_name = self.trait_list[0][1].group.name + temp_dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) + temp_dataset.group.get_samplelist() + for i, pca_trait in enumerate(pca_traits): + trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + this_vals_string = "" + position = 0 + for sample in temp_dataset.group.all_samples_ordered(): + if sample in self.shared_samples_list: + this_vals_string += str(pca_trait[position]) + this_vals_string += " " + position += 1 + else: + this_vals_string += "x " + this_vals_string = this_vals_string[:-1] + + Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) + self.pca_trait_ids.append(trait_id) + + return pca def process_loadings(self): loadings_array = [] diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index e623a968..59f9b47c 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -70,7 +70,7 @@ <select name="corr_sample_method" class="form-control"> <option value="pearson">Pearson</option> <option value="spearman">Spearman Rank</option> - <!-- <option value="bicor">Biweight Midcorrelation</option> --> + <option value="bicor">Biweight Midcorrelation</option> </select> </div> </div> |