diff options
author | zsloan | 2020-11-19 14:38:04 -0600 |
---|---|---|
committer | zsloan | 2020-11-19 14:38:04 -0600 |
commit | 6cc806e65bee5652bbe761c10079017a5b44a160 (patch) | |
tree | 1c3d78c2bdc6c0280b7dc001fe6f7acbb2413e6e | |
parent | 6ed037083f0b2bac95021e5fb00c0c8877422a47 (diff) | |
download | genenetwork2-6cc806e65bee5652bbe761c10079017a5b44a160.tar.gz |
Removed lines that check that all traits are part of the same group,
since it might be the case in the future that different groups still
share sample names (and it's not really necessary since we check how
many samples are shraed between each individual pair of traits)
-rw-r--r-- | wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 210 |
1 files changed, 101 insertions, 109 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index a394f548..0269ce68 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -55,11 +55,7 @@ class CorrelationMatrix(object): self.do_PCA = True this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: - if trait_db[1].group.name != this_group: - self.insufficient_shared_samples = True - break - else: - this_group = trait_db[1].group.name + this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data @@ -68,119 +64,115 @@ class CorrelationMatrix(object): if sample not in self.all_sample_list: self.all_sample_list.append(sample) - if self.insufficient_shared_samples: - pass - else: - self.sample_data = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_sample_data = this_trait.data + self.sample_data = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_sample_data = this_trait.data - this_trait_vals = [] - for sample in self.all_sample_list: - if sample in this_sample_data: - this_trait_vals.append(this_sample_data[sample].value) - else: - this_trait_vals.append('') - self.sample_data.append(this_trait_vals) - - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples - self.do_PCA = False - - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) - - self.corr_results = [] - self.pca_corr_results = [] - self.shared_samples_list = self.all_sample_list - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data - - corr_result_row = [] - pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal - for target in self.trait_list: - target_trait = target[0] - target_db = target[1] - target_samples = target_db.group.all_samples_ordered() - target_sample_data = target_trait.data - - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(target_samples): - if (sample in this_sample_data) and (sample in target_sample_data): - sample_value = this_sample_data[sample].value - target_sample_value = target_sample_data[sample].value - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - else: - if sample in self.shared_samples_list: - self.shared_samples_list.remove(sample) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - - if num_overlap < self.lowest_overlap: - self.lowest_overlap = num_overlap - if num_overlap < 2: - corr_result_row.append([target_trait, 0, num_overlap]) - pca_corr_result_row.append(0) - else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - if is_spearman == False: - sample_r, sample_p = pearson_r, pearson_p - if sample_r == 1: - is_spearman = True - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - corr_result_row.append([target_trait, sample_r, num_overlap]) - pca_corr_result_row.append(pearson_r) - - self.corr_results.append(corr_result_row) - self.pca_corr_results.append(pca_corr_result_row) - - self.trait_data_array = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data + this_trait_vals = [] + for sample in self.all_sample_list: + if sample in this_sample_data: + this_trait_vals.append(this_sample_data[sample].value) + else: + this_trait_vals.append('') + self.sample_data.append(this_trait_vals) + + if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + self.do_PCA = False + + self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + + self.corr_results = [] + self.pca_corr_results = [] + self.shared_samples_list = self.all_sample_list + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + corr_result_row = [] + pca_corr_result_row = [] + is_spearman = False #ZS: To determine if it's above or below the diagonal + for target in self.trait_list: + target_trait = target[0] + target_db = target[1] + target_samples = target_db.group.all_samples_ordered() + target_sample_data = target_trait.data this_trait_vals = [] - for index, sample in enumerate(this_db_samples): - if (sample in this_sample_data) and (sample in self.shared_samples_list): + target_vals = [] + for index, sample in enumerate(target_samples): + if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value + target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) - self.trait_data_array.append(this_trait_vals) + target_vals.append(target_sample_value) + else: + if sample in self.shared_samples_list: + self.shared_samples_list.remove(sample) - corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - groups = [] - for sample in self.all_sample_list: - groups.append(1) - - try: - if self.do_PCA == True: - self.pca_works = "True" - self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - self.loadings_array = self.process_loadings() + if num_overlap < self.lowest_overlap: + self.lowest_overlap = num_overlap + if num_overlap < 2: + corr_result_row.append([target_trait, 0, num_overlap]) + pca_corr_result_row.append(0) else: - self.pca_works = "False" - except: - self.pca_works = "False" + pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + if is_spearman == False: + sample_r, sample_p = pearson_r, pearson_p + if sample_r == 1: + is_spearman = True + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + corr_result_row.append([target_trait, sample_r, num_overlap]) + pca_corr_result_row.append(pearson_r) - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) - # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) + self.corr_results.append(corr_result_row) + self.pca_corr_results.append(pca_corr_result_row) + + self.trait_data_array = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + this_trait_vals = [] + for index, sample in enumerate(this_db_samples): + if (sample in this_sample_data) and (sample in self.shared_samples_list): + sample_value = this_sample_data[sample].value + this_trait_vals.append(sample_value) + self.trait_data_array.append(this_trait_vals) + + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + + groups = [] + for sample in self.all_sample_list: + groups.append(1) + + try: + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" + + self.js_data = dict(traits = [trait.name for trait in self.traits], + groups = groups, + cols = list(range(len(self.traits))), + rows = list(range(len(self.traits))), + samples = self.all_sample_list, + sample_data = self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') |