From 6cc806e65bee5652bbe761c10079017a5b44a160 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 19 Nov 2020 14:38:04 -0600 Subject: Removed lines that check that all traits are part of the same group, since it might be the case in the future that different groups still share sample names (and it's not really necessary since we check how many samples are shraed between each individual pair of traits) --- .../wqflask/correlation_matrix/show_corr_matrix.py | 210 ++++++++++----------- 1 file changed, 101 insertions(+), 109 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index a394f548..0269ce68 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -55,11 +55,7 @@ class CorrelationMatrix(object): self.do_PCA = True this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: - if trait_db[1].group.name != this_group: - self.insufficient_shared_samples = True - break - else: - this_group = trait_db[1].group.name + this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data @@ -68,119 +64,115 @@ class CorrelationMatrix(object): if sample not in self.all_sample_list: self.all_sample_list.append(sample) - if self.insufficient_shared_samples: - pass - else: - self.sample_data = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_sample_data = this_trait.data + self.sample_data = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_sample_data = this_trait.data - this_trait_vals = [] - for sample in self.all_sample_list: - if sample in this_sample_data: - this_trait_vals.append(this_sample_data[sample].value) - else: - this_trait_vals.append('') - self.sample_data.append(this_trait_vals) - - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples - self.do_PCA = False - - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) - - self.corr_results = [] - self.pca_corr_results = [] - self.shared_samples_list = self.all_sample_list - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data - - corr_result_row = [] - pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal - for target in self.trait_list: - target_trait = target[0] - target_db = target[1] - target_samples = target_db.group.all_samples_ordered() - target_sample_data = target_trait.data - - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(target_samples): - if (sample in this_sample_data) and (sample in target_sample_data): - sample_value = this_sample_data[sample].value - target_sample_value = target_sample_data[sample].value - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - else: - if sample in self.shared_samples_list: - self.shared_samples_list.remove(sample) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - - if num_overlap < self.lowest_overlap: - self.lowest_overlap = num_overlap - if num_overlap < 2: - corr_result_row.append([target_trait, 0, num_overlap]) - pca_corr_result_row.append(0) - else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - if is_spearman == False: - sample_r, sample_p = pearson_r, pearson_p - if sample_r == 1: - is_spearman = True - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - corr_result_row.append([target_trait, sample_r, num_overlap]) - pca_corr_result_row.append(pearson_r) - - self.corr_results.append(corr_result_row) - self.pca_corr_results.append(pca_corr_result_row) - - self.trait_data_array = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data + this_trait_vals = [] + for sample in self.all_sample_list: + if sample in this_sample_data: + this_trait_vals.append(this_sample_data[sample].value) + else: + this_trait_vals.append('') + self.sample_data.append(this_trait_vals) + + if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + self.do_PCA = False + + self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + + self.corr_results = [] + self.pca_corr_results = [] + self.shared_samples_list = self.all_sample_list + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + corr_result_row = [] + pca_corr_result_row = [] + is_spearman = False #ZS: To determine if it's above or below the diagonal + for target in self.trait_list: + target_trait = target[0] + target_db = target[1] + target_samples = target_db.group.all_samples_ordered() + target_sample_data = target_trait.data this_trait_vals = [] - for index, sample in enumerate(this_db_samples): - if (sample in this_sample_data) and (sample in self.shared_samples_list): + target_vals = [] + for index, sample in enumerate(target_samples): + if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value + target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) - self.trait_data_array.append(this_trait_vals) + target_vals.append(target_sample_value) + else: + if sample in self.shared_samples_list: + self.shared_samples_list.remove(sample) - corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - groups = [] - for sample in self.all_sample_list: - groups.append(1) - - try: - if self.do_PCA == True: - self.pca_works = "True" - self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - self.loadings_array = self.process_loadings() + if num_overlap < self.lowest_overlap: + self.lowest_overlap = num_overlap + if num_overlap < 2: + corr_result_row.append([target_trait, 0, num_overlap]) + pca_corr_result_row.append(0) else: - self.pca_works = "False" - except: - self.pca_works = "False" + pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + if is_spearman == False: + sample_r, sample_p = pearson_r, pearson_p + if sample_r == 1: + is_spearman = True + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + corr_result_row.append([target_trait, sample_r, num_overlap]) + pca_corr_result_row.append(pearson_r) - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) - # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) + self.corr_results.append(corr_result_row) + self.pca_corr_results.append(pca_corr_result_row) + + self.trait_data_array = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + this_trait_vals = [] + for index, sample in enumerate(this_db_samples): + if (sample in this_sample_data) and (sample in self.shared_samples_list): + sample_value = this_sample_data[sample].value + this_trait_vals.append(sample_value) + self.trait_data_array.append(this_trait_vals) + + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + + groups = [] + for sample in self.all_sample_list: + groups.append(1) + + try: + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" + + self.js_data = dict(traits = [trait.name for trait in self.traits], + groups = groups, + cols = list(range(len(self.traits))), + rows = list(range(len(self.traits))), + samples = self.all_sample_list, + sample_data = self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') -- cgit v1.2.3