diff options
-rw-r--r-- | wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 210 |
1 files changed, 101 insertions, 109 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index a394f548..0269ce68 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -55,11 +55,7 @@ class CorrelationMatrix(object): self.do_PCA = True this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: - if trait_db[1].group.name != this_group: - self.insufficient_shared_samples = True - break - else: - this_group = trait_db[1].group.name + this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data @@ -68,119 +64,115 @@ class CorrelationMatrix(object): if sample not in self.all_sample_list: self.all_sample_list.append(sample) - if self.insufficient_shared_samples: - pass - else: - self.sample_data = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_sample_data = this_trait.data + self.sample_data = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_sample_data = this_trait.data - this_trait_vals = [] - for sample in self.all_sample_list: - if sample in this_sample_data: - this_trait_vals.append(this_sample_data[sample].value) - else: - this_trait_vals.append('') - self.sample_data.append(this_trait_vals) - - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples - self.do_PCA = False - - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) - - self.corr_results = [] - self.pca_corr_results = [] - self.shared_samples_list = self.all_sample_list - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data - - corr_result_row = [] - pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal - for target in self.trait_list: - target_trait = target[0] - target_db = target[1] - target_samples = target_db.group.all_samples_ordered() - target_sample_data = target_trait.data - - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(target_samples): - if (sample in this_sample_data) and (sample in target_sample_data): - sample_value = this_sample_data[sample].value - target_sample_value = target_sample_data[sample].value - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - else: - if sample in self.shared_samples_list: - self.shared_samples_list.remove(sample) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - - if num_overlap < self.lowest_overlap: - self.lowest_overlap = num_overlap - if num_overlap < 2: - corr_result_row.append([target_trait, 0, num_overlap]) - pca_corr_result_row.append(0) - else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - if is_spearman == False: - sample_r, sample_p = pearson_r, pearson_p - if sample_r == 1: - is_spearman = True - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - corr_result_row.append([target_trait, sample_r, num_overlap]) - pca_corr_result_row.append(pearson_r) - - self.corr_results.append(corr_result_row) - self.pca_corr_results.append(pca_corr_result_row) - - self.trait_data_array = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data + this_trait_vals = [] + for sample in self.all_sample_list: + if sample in this_sample_data: + this_trait_vals.append(this_sample_data[sample].value) + else: + this_trait_vals.append('') + self.sample_data.append(this_trait_vals) + + if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + self.do_PCA = False + + self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + + self.corr_results = [] + self.pca_corr_results = [] + self.shared_samples_list = self.all_sample_list + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + corr_result_row = [] + pca_corr_result_row = [] + is_spearman = False #ZS: To determine if it's above or below the diagonal + for target in self.trait_list: + target_trait = target[0] + target_db = target[1] + target_samples = target_db.group.all_samples_ordered() + target_sample_data = target_trait.data this_trait_vals = [] - for index, sample in enumerate(this_db_samples): - if (sample in this_sample_data) and (sample in self.shared_samples_list): + target_vals = [] + for index, sample in enumerate(target_samples): + if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value + target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) - self.trait_data_array.append(this_trait_vals) + target_vals.append(target_sample_value) + else: + if sample in self.shared_samples_list: + self.shared_samples_list.remove(sample) - corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - groups = [] - for sample in self.all_sample_list: - groups.append(1) - - try: - if self.do_PCA == True: - self.pca_works = "True" - self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - self.loadings_array = self.process_loadings() + if num_overlap < self.lowest_overlap: + self.lowest_overlap = num_overlap + if num_overlap < 2: + corr_result_row.append([target_trait, 0, num_overlap]) + pca_corr_result_row.append(0) else: - self.pca_works = "False" - except: - self.pca_works = "False" + pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + if is_spearman == False: + sample_r, sample_p = pearson_r, pearson_p + if sample_r == 1: + is_spearman = True + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + corr_result_row.append([target_trait, sample_r, num_overlap]) + pca_corr_result_row.append(pearson_r) - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) - # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) + self.corr_results.append(corr_result_row) + self.pca_corr_results.append(pca_corr_result_row) + + self.trait_data_array = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + this_trait_vals = [] + for index, sample in enumerate(this_db_samples): + if (sample in this_sample_data) and (sample in self.shared_samples_list): + sample_value = this_sample_data[sample].value + this_trait_vals.append(sample_value) + self.trait_data_array.append(this_trait_vals) + + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + + groups = [] + for sample in self.all_sample_list: + groups.append(1) + + try: + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" + + self.js_data = dict(traits = [trait.name for trait in self.traits], + groups = groups, + cols = list(range(len(self.traits))), + rows = list(range(len(self.traits))), + samples = self.all_sample_list, + sample_data = self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') |