diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 210 | ||||
-rw-r--r-- | wqflask/wqflask/templates/correlation_matrix.html | 4 |
2 files changed, 105 insertions, 109 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index a394f548..0269ce68 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -55,11 +55,7 @@ class CorrelationMatrix(object): self.do_PCA = True this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: - if trait_db[1].group.name != this_group: - self.insufficient_shared_samples = True - break - else: - this_group = trait_db[1].group.name + this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data @@ -68,119 +64,115 @@ class CorrelationMatrix(object): if sample not in self.all_sample_list: self.all_sample_list.append(sample) - if self.insufficient_shared_samples: - pass - else: - self.sample_data = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_sample_data = this_trait.data + self.sample_data = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_sample_data = this_trait.data - this_trait_vals = [] - for sample in self.all_sample_list: - if sample in this_sample_data: - this_trait_vals.append(this_sample_data[sample].value) - else: - this_trait_vals.append('') - self.sample_data.append(this_trait_vals) - - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples - self.do_PCA = False - - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) - - self.corr_results = [] - self.pca_corr_results = [] - self.shared_samples_list = self.all_sample_list - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data - - corr_result_row = [] - pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal - for target in self.trait_list: - target_trait = target[0] - target_db = target[1] - target_samples = target_db.group.all_samples_ordered() - target_sample_data = target_trait.data - - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(target_samples): - if (sample in this_sample_data) and (sample in target_sample_data): - sample_value = this_sample_data[sample].value - target_sample_value = target_sample_data[sample].value - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - else: - if sample in self.shared_samples_list: - self.shared_samples_list.remove(sample) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - - if num_overlap < self.lowest_overlap: - self.lowest_overlap = num_overlap - if num_overlap < 2: - corr_result_row.append([target_trait, 0, num_overlap]) - pca_corr_result_row.append(0) - else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - if is_spearman == False: - sample_r, sample_p = pearson_r, pearson_p - if sample_r == 1: - is_spearman = True - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - corr_result_row.append([target_trait, sample_r, num_overlap]) - pca_corr_result_row.append(pearson_r) - - self.corr_results.append(corr_result_row) - self.pca_corr_results.append(pca_corr_result_row) - - self.trait_data_array = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_db = trait_db[1] - this_db_samples = this_db.group.all_samples_ordered() - this_sample_data = this_trait.data + this_trait_vals = [] + for sample in self.all_sample_list: + if sample in this_sample_data: + this_trait_vals.append(this_sample_data[sample].value) + else: + this_trait_vals.append('') + self.sample_data.append(this_trait_vals) + + if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + self.do_PCA = False + + self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + + self.corr_results = [] + self.pca_corr_results = [] + self.shared_samples_list = self.all_sample_list + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + corr_result_row = [] + pca_corr_result_row = [] + is_spearman = False #ZS: To determine if it's above or below the diagonal + for target in self.trait_list: + target_trait = target[0] + target_db = target[1] + target_samples = target_db.group.all_samples_ordered() + target_sample_data = target_trait.data this_trait_vals = [] - for index, sample in enumerate(this_db_samples): - if (sample in this_sample_data) and (sample in self.shared_samples_list): + target_vals = [] + for index, sample in enumerate(target_samples): + if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value + target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) - self.trait_data_array.append(this_trait_vals) + target_vals.append(target_sample_value) + else: + if sample in self.shared_samples_list: + self.shared_samples_list.remove(sample) - corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) - groups = [] - for sample in self.all_sample_list: - groups.append(1) - - try: - if self.do_PCA == True: - self.pca_works = "True" - self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - self.loadings_array = self.process_loadings() + if num_overlap < self.lowest_overlap: + self.lowest_overlap = num_overlap + if num_overlap < 2: + corr_result_row.append([target_trait, 0, num_overlap]) + pca_corr_result_row.append(0) else: - self.pca_works = "False" - except: - self.pca_works = "False" + pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + if is_spearman == False: + sample_r, sample_p = pearson_r, pearson_p + if sample_r == 1: + is_spearman = True + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + corr_result_row.append([target_trait, sample_r, num_overlap]) + pca_corr_result_row.append(pearson_r) - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) - # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) + self.corr_results.append(corr_result_row) + self.pca_corr_results.append(pca_corr_result_row) + + self.trait_data_array = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_db = trait_db[1] + this_db_samples = this_db.group.all_samples_ordered() + this_sample_data = this_trait.data + + this_trait_vals = [] + for index, sample in enumerate(this_db_samples): + if (sample in this_sample_data) and (sample in self.shared_samples_list): + sample_value = this_sample_data[sample].value + this_trait_vals.append(sample_value) + self.trait_data_array.append(this_trait_vals) + + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + + groups = [] + for sample in self.all_sample_list: + groups.append(1) + + try: + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" + + self.js_data = dict(traits = [trait.name for trait in self.traits], + groups = groups, + cols = list(range(len(self.traits))), + rows = list(range(len(self.traits))), + samples = self.all_sample_list, + sample_data = self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') diff --git a/wqflask/wqflask/templates/correlation_matrix.html b/wqflask/wqflask/templates/correlation_matrix.html index d556f31a..4e150618 100644 --- a/wqflask/wqflask/templates/correlation_matrix.html +++ b/wqflask/wqflask/templates/correlation_matrix.html @@ -51,8 +51,12 @@ {% if result[0].name == trait.name and result[0].dataset == trait.dataset %} <td nowrap="ON" align="center" bgcolor="#cccccc" style="padding: 3px; line-height: 1.1;"><a href="/show_trait?trait_id={{ trait.name }}&dataset={{ trait.dataset.name }}"><font style="font-size: 12px; color: #3071a9; font-weight: bold;" ><em>n</em><br>{{ result[2] }}</font></a></td> {% else %} + {% if result[1] == 0 %} + <td nowrap="ON" align="middle" bgcolor="#eeeeee" style="padding: 3px; line-height: 1.1;">N/A</td> + {% else %} <td nowrap="ON" align="middle" class="corr_cell" style="padding: 3px; line-height: 1.1;"><a href="/corr_scatter_plot?dataset_1={% if trait.dataset.name == 'Temp' %}Temp_{{ trait.dataset.group.name }}{% else %}{{ trait.dataset.name }}{% endif %}&dataset_2={% if result[0].dataset.name == 'Temp' %}Temp_{{ result[0].dataset.group.name }}{% else %}{{ result[0].dataset.name }}{% endif %}&trait_1={{ trait.name }}&trait_2={{ result[0].name }}"><font style="font-size: 12px; color: #3071a9; font-weight: bold;" ><span class="corr_value">{{ '%0.2f' % result[1] }}</span><br>{{ result[2] }}</font></a></td> {% endif %} + {% endif %} {% endfor %} </tr> {% endfor %} |