aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2020-11-19 14:38:04 -0600
committerzsloan2020-11-19 14:38:04 -0600
commit6cc806e65bee5652bbe761c10079017a5b44a160 (patch)
tree1c3d78c2bdc6c0280b7dc001fe6f7acbb2413e6e
parent6ed037083f0b2bac95021e5fb00c0c8877422a47 (diff)
downloadgenenetwork2-6cc806e65bee5652bbe761c10079017a5b44a160.tar.gz
Removed lines that check that all traits are part of the same group,
since it might be the case in the future that different groups still share sample names (and it's not really necessary since we check how many samples are shraed between each individual pair of traits)
-rw-r--r--wqflask/wqflask/correlation_matrix/show_corr_matrix.py210
1 files changed, 101 insertions, 109 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index a394f548..0269ce68 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -55,11 +55,7 @@ class CorrelationMatrix(object):
self.do_PCA = True
this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop
for trait_db in self.trait_list:
- if trait_db[1].group.name != this_group:
- self.insufficient_shared_samples = True
- break
- else:
- this_group = trait_db[1].group.name
+ this_group = trait_db[1].group.name
this_trait = trait_db[0]
self.traits.append(this_trait)
this_sample_data = this_trait.data
@@ -68,119 +64,115 @@ class CorrelationMatrix(object):
if sample not in self.all_sample_list:
self.all_sample_list.append(sample)
- if self.insufficient_shared_samples:
- pass
- else:
- self.sample_data = []
- for trait_db in self.trait_list:
- this_trait = trait_db[0]
- this_sample_data = this_trait.data
+ self.sample_data = []
+ for trait_db in self.trait_list:
+ this_trait = trait_db[0]
+ this_sample_data = this_trait.data
- this_trait_vals = []
- for sample in self.all_sample_list:
- if sample in this_sample_data:
- this_trait_vals.append(this_sample_data[sample].value)
- else:
- this_trait_vals.append('')
- self.sample_data.append(this_trait_vals)
-
- if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
- self.do_PCA = False
-
- self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)
-
- self.corr_results = []
- self.pca_corr_results = []
- self.shared_samples_list = self.all_sample_list
- for trait_db in self.trait_list:
- this_trait = trait_db[0]
- this_db = trait_db[1]
-
- this_db_samples = this_db.group.all_samples_ordered()
- this_sample_data = this_trait.data
-
- corr_result_row = []
- pca_corr_result_row = []
- is_spearman = False #ZS: To determine if it's above or below the diagonal
- for target in self.trait_list:
- target_trait = target[0]
- target_db = target[1]
- target_samples = target_db.group.all_samples_ordered()
- target_sample_data = target_trait.data
-
- this_trait_vals = []
- target_vals = []
- for index, sample in enumerate(target_samples):
- if (sample in this_sample_data) and (sample in target_sample_data):
- sample_value = this_sample_data[sample].value
- target_sample_value = target_sample_data[sample].value
- this_trait_vals.append(sample_value)
- target_vals.append(target_sample_value)
- else:
- if sample in self.shared_samples_list:
- self.shared_samples_list.remove(sample)
-
- this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
-
- if num_overlap < self.lowest_overlap:
- self.lowest_overlap = num_overlap
- if num_overlap < 2:
- corr_result_row.append([target_trait, 0, num_overlap])
- pca_corr_result_row.append(0)
- else:
- pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
- if is_spearman == False:
- sample_r, sample_p = pearson_r, pearson_p
- if sample_r == 1:
- is_spearman = True
- else:
- sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
-
- corr_result_row.append([target_trait, sample_r, num_overlap])
- pca_corr_result_row.append(pearson_r)
-
- self.corr_results.append(corr_result_row)
- self.pca_corr_results.append(pca_corr_result_row)
-
- self.trait_data_array = []
- for trait_db in self.trait_list:
- this_trait = trait_db[0]
- this_db = trait_db[1]
- this_db_samples = this_db.group.all_samples_ordered()
- this_sample_data = this_trait.data
+ this_trait_vals = []
+ for sample in self.all_sample_list:
+ if sample in this_sample_data:
+ this_trait_vals.append(this_sample_data[sample].value)
+ else:
+ this_trait_vals.append('')
+ self.sample_data.append(this_trait_vals)
+
+ if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
+ self.do_PCA = False
+
+ self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)
+
+ self.corr_results = []
+ self.pca_corr_results = []
+ self.shared_samples_list = self.all_sample_list
+ for trait_db in self.trait_list:
+ this_trait = trait_db[0]
+ this_db = trait_db[1]
+
+ this_db_samples = this_db.group.all_samples_ordered()
+ this_sample_data = this_trait.data
+
+ corr_result_row = []
+ pca_corr_result_row = []
+ is_spearman = False #ZS: To determine if it's above or below the diagonal
+ for target in self.trait_list:
+ target_trait = target[0]
+ target_db = target[1]
+ target_samples = target_db.group.all_samples_ordered()
+ target_sample_data = target_trait.data
this_trait_vals = []
- for index, sample in enumerate(this_db_samples):
- if (sample in this_sample_data) and (sample in self.shared_samples_list):
+ target_vals = []
+ for index, sample in enumerate(target_samples):
+ if (sample in this_sample_data) and (sample in target_sample_data):
sample_value = this_sample_data[sample].value
+ target_sample_value = target_sample_data[sample].value
this_trait_vals.append(sample_value)
- self.trait_data_array.append(this_trait_vals)
+ target_vals.append(target_sample_value)
+ else:
+ if sample in self.shared_samples_list:
+ self.shared_samples_list.remove(sample)
- corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
- corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)
+ this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
- groups = []
- for sample in self.all_sample_list:
- groups.append(1)
-
- try:
- if self.do_PCA == True:
- self.pca_works = "True"
- self.pca_trait_ids = []
- pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
- self.loadings_array = self.process_loadings()
+ if num_overlap < self.lowest_overlap:
+ self.lowest_overlap = num_overlap
+ if num_overlap < 2:
+ corr_result_row.append([target_trait, 0, num_overlap])
+ pca_corr_result_row.append(0)
else:
- self.pca_works = "False"
- except:
- self.pca_works = "False"
+ pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
+ if is_spearman == False:
+ sample_r, sample_p = pearson_r, pearson_p
+ if sample_r == 1:
+ is_spearman = True
+ else:
+ sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
+
+ corr_result_row.append([target_trait, sample_r, num_overlap])
+ pca_corr_result_row.append(pearson_r)
- self.js_data = dict(traits = [trait.name for trait in self.traits],
- groups = groups,
- cols = list(range(len(self.traits))),
- rows = list(range(len(self.traits))),
- samples = self.all_sample_list,
- sample_data = self.sample_data,)
- # corr_results = [result[1] for result in result_row for result_row in self.corr_results])
+ self.corr_results.append(corr_result_row)
+ self.pca_corr_results.append(pca_corr_result_row)
+
+ self.trait_data_array = []
+ for trait_db in self.trait_list:
+ this_trait = trait_db[0]
+ this_db = trait_db[1]
+ this_db_samples = this_db.group.all_samples_ordered()
+ this_sample_data = this_trait.data
+
+ this_trait_vals = []
+ for index, sample in enumerate(this_db_samples):
+ if (sample in this_sample_data) and (sample in self.shared_samples_list):
+ sample_value = this_sample_data[sample].value
+ this_trait_vals.append(sample_value)
+ self.trait_data_array.append(this_trait_vals)
+
+ corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
+ corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)
+
+ groups = []
+ for sample in self.all_sample_list:
+ groups.append(1)
+
+ try:
+ if self.do_PCA == True:
+ self.pca_works = "True"
+ self.pca_trait_ids = []
+ pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
+ self.loadings_array = self.process_loadings()
+ else:
+ self.pca_works = "False"
+ except:
+ self.pca_works = "False"
+
+ self.js_data = dict(traits = [trait.name for trait in self.traits],
+ groups = groups,
+ cols = list(range(len(self.traits))),
+ rows = list(range(len(self.traits))),
+ samples = self.all_sample_list,
+ sample_data = self.sample_data,)
def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
base = importr('base')