diff options
author | zsloan | 2021-06-18 19:21:30 +0000 |
---|---|---|
committer | zsloan | 2021-06-18 19:21:30 +0000 |
commit | df8476115e580fa5dfbf0e2e9a8f6e5e39ae7b99 (patch) | |
tree | af0c520c51e9e5d506bdd527d9342813b74d8624 /wqflask | |
parent | fafce2f44087edf51756f0118054d1e3aa654273 (diff) | |
download | genenetwork2-df8476115e580fa5dfbf0e2e9a8f6e5e39ae7b99.tar.gz |
Reenable PCA for correlation matrix
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 124 |
1 files changed, 63 insertions, 61 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 9ac02ac5..e7b16e77 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -23,6 +23,9 @@ import math import random import string +import rpy2.robjects as ro +from rpy2.robjects.packages import importr + import numpy as np import scipy @@ -160,23 +163,22 @@ class CorrelationMatrix: for sample in self.all_sample_list: groups.append(1) - # Not doing PCA until rpy2 is excised self.pca_works = "False" - # try: - # corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - # corr_eigen_value, corr_eigen_vectors = sortEigenVectors( - # corr_result_eigen) - - # if self.do_PCA == True: - # self.pca_works = "True" - # self.pca_trait_ids = [] - # pca = self.calculate_pca( - # list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - # self.loadings_array = self.process_loadings() - # else: - # self.pca_works = "False" - # except: - # self.pca_works = "False" + try: + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + corr_result_eigen) + + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca( + list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" self.js_data = dict(traits=[trait.name for trait in self.traits], groups=groups, @@ -185,51 +187,51 @@ class CorrelationMatrix: samples=self.all_sample_list, sample_data=self.sample_data,) - # def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): - # base = importr('base') - # stats = importr('stats') - - # corr_results_to_list = robjects.FloatVector( - # [item for sublist in self.pca_corr_results for item in sublist]) - - # m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) - # eigen = base.eigen(m) - # pca = stats.princomp(m, cor="TRUE") - # self.loadings = pca.rx('loadings') - # self.scores = pca.rx('scores') - # self.scale = pca.rx('scale') - - # trait_array = zScore(self.trait_data_array) - # trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) - - # pca_traits = [] - # for i, vector in enumerate(trait_array_vectors): - # # ZS: Check if below check is necessary - # # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): - # pca_traits.append((vector * -1.0).tolist()) - - # this_group_name = self.trait_list[0][1].group.name - # temp_dataset = data_set.create_dataset( - # dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) - # temp_dataset.group.get_samplelist() - # for i, pca_trait in enumerate(pca_traits): - # trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ - # this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") - # this_vals_string = "" - # position = 0 - # for sample in temp_dataset.group.all_samples_ordered(): - # if sample in self.shared_samples_list: - # this_vals_string += str(pca_trait[position]) - # this_vals_string += " " - # position += 1 - # else: - # this_vals_string += "x " - # this_vals_string = this_vals_string[:-1] - - # Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) - # self.pca_trait_ids.append(trait_id) - - # return pca + def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): + base = importr('base') + stats = importr('stats') + + corr_results_to_list = ro.FloatVector( + [item for sublist in self.pca_corr_results for item in sublist]) + + m = ro.r.matrix(corr_results_to_list, nrow=len(cols)) + eigen = base.eigen(m) + pca = stats.princomp(m, cor="TRUE") + self.loadings = pca.rx('loadings') + self.scores = pca.rx('scores') + self.scale = pca.rx('scale') + + trait_array = zScore(self.trait_data_array) + trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) + + pca_traits = [] + for i, vector in enumerate(trait_array_vectors): + # ZS: Check if below check is necessary + # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): + pca_traits.append((vector * -1.0).tolist()) + + this_group_name = self.trait_list[0][1].group.name + temp_dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) + temp_dataset.group.get_samplelist() + for i, pca_trait in enumerate(pca_traits): + trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + this_vals_string = "" + position = 0 + for sample in temp_dataset.group.all_samples_ordered(): + if sample in self.shared_samples_list: + this_vals_string += str(pca_trait[position]) + this_vals_string += " " + position += 1 + else: + this_vals_string += "x " + this_vals_string = this_vals_string[:-1] + + Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) + self.pca_trait_ids.append(trait_id) + + return pca def process_loadings(self): loadings_array = [] |