Removed lines that check that all traits are part of the same group,

since it might be the case in the future that different groups still share sample names (and it's not really necessary since we check how many samples are shraed between each individual pair of traits)
author: zsloan 2020-11-19 14:38:04 -0600
committer: zsloan 2020-11-19 14:38:04 -0600
commit: 6cc806e65bee5652bbe761c10079017a5b44a160 (patch)
tree: 1c3d78c2bdc6c0280b7dc001fe6f7acbb2413e6e /wqflask
parent: 6ed037083f0b2bac95021e5fb00c0c8877422a47 (diff)
download: genenetwork2-6cc806e65bee5652bbe761c10079017a5b44a160.tar.gz
1 files changed, 101 insertions, 109 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index a394f548..0269ce68 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -55,11 +55,7 @@ class CorrelationMatrix(object):
         self.do_PCA = True
         this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop
         for trait_db in self.trait_list:
-            if trait_db[1].group.name != this_group:
-                self.insufficient_shared_samples = True
-                break
-            else:
-                this_group = trait_db[1].group.name
+            this_group = trait_db[1].group.name
             this_trait = trait_db[0]
             self.traits.append(this_trait)
             this_sample_data = this_trait.data
@@ -68,119 +64,115 @@ class CorrelationMatrix(object):
                 if sample not in self.all_sample_list:
                     self.all_sample_list.append(sample)
 
-        if self.insufficient_shared_samples:
-            pass
-        else:
-            self.sample_data = []
-            for trait_db in self.trait_list:
-                this_trait = trait_db[0]
-                this_sample_data = this_trait.data
+        self.sample_data = []
+        for trait_db in self.trait_list:
+            this_trait = trait_db[0]
+            this_sample_data = this_trait.data
 
-                this_trait_vals = []
-                for sample in self.all_sample_list:
-                    if sample in this_sample_data:
-                        this_trait_vals.append(this_sample_data[sample].value)
-                    else:
-                        this_trait_vals.append('')
-                self.sample_data.append(this_trait_vals)
-
-            if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
-                self.do_PCA = False
-
-            self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)
-
-            self.corr_results = []
-            self.pca_corr_results = []
-            self.shared_samples_list = self.all_sample_list
-            for trait_db in self.trait_list:
-                this_trait = trait_db[0]
-                this_db = trait_db[1]
-
-                this_db_samples = this_db.group.all_samples_ordered()
-                this_sample_data = this_trait.data
-
-                corr_result_row = []
-                pca_corr_result_row = []
-                is_spearman = False #ZS: To determine if it's above or below the diagonal
-                for target in self.trait_list:
-                    target_trait = target[0]
-                    target_db = target[1]
-                    target_samples = target_db.group.all_samples_ordered()
-                    target_sample_data = target_trait.data
-
-                    this_trait_vals = []
-                    target_vals = []
-                    for index, sample in enumerate(target_samples):
-                        if (sample in this_sample_data) and (sample in target_sample_data):
-                            sample_value = this_sample_data[sample].value
-                            target_sample_value = target_sample_data[sample].value
-                            this_trait_vals.append(sample_value)
-                            target_vals.append(target_sample_value)
-                        else:
-                            if sample in self.shared_samples_list:
-                                self.shared_samples_list.remove(sample)
-
-                    this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
-
-                    if num_overlap < self.lowest_overlap:
-                        self.lowest_overlap = num_overlap
-                    if num_overlap < 2:
-                        corr_result_row.append([target_trait, 0, num_overlap])
-                        pca_corr_result_row.append(0)
-                    else:
-                        pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
-                        if is_spearman == False:
-                            sample_r, sample_p = pearson_r, pearson_p
-                            if sample_r == 1:
-                                is_spearman = True
-                        else:
-                            sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
-
-                        corr_result_row.append([target_trait, sample_r, num_overlap])
-                        pca_corr_result_row.append(pearson_r)
-
-                self.corr_results.append(corr_result_row)
-                self.pca_corr_results.append(pca_corr_result_row)
-
-            self.trait_data_array = []
-            for trait_db in self.trait_list:
-                this_trait = trait_db[0]
-                this_db = trait_db[1]
-                this_db_samples = this_db.group.all_samples_ordered()
-                this_sample_data = this_trait.data
+            this_trait_vals = []
+            for sample in self.all_sample_list:
+                if sample in this_sample_data:
+                    this_trait_vals.append(this_sample_data[sample].value)
+                else:
+                    this_trait_vals.append('')
+            self.sample_data.append(this_trait_vals)
+
+        if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
+            self.do_PCA = False
+
+        self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)
+
+        self.corr_results = []
+        self.pca_corr_results = []
+        self.shared_samples_list = self.all_sample_list
+        for trait_db in self.trait_list:
+            this_trait = trait_db[0]
+            this_db = trait_db[1]
+
+            this_db_samples = this_db.group.all_samples_ordered()
+            this_sample_data = this_trait.data
+
+            corr_result_row = []
+            pca_corr_result_row = []
+            is_spearman = False #ZS: To determine if it's above or below the diagonal
+            for target in self.trait_list:
+                target_trait = target[0]
+                target_db = target[1]
+                target_samples = target_db.group.all_samples_ordered()
+                target_sample_data = target_trait.data
 
                 this_trait_vals = []
-                for index, sample in enumerate(this_db_samples):
-                    if (sample in this_sample_data) and (sample in self.shared_samples_list):
+                target_vals = []
+                for index, sample in enumerate(target_samples):
+                    if (sample in this_sample_data) and (sample in target_sample_data):
                         sample_value = this_sample_data[sample].value
+                        target_sample_value = target_sample_data[sample].value
                         this_trait_vals.append(sample_value)
-                self.trait_data_array.append(this_trait_vals)
+                        target_vals.append(target_sample_value)
+                    else:
+                        if sample in self.shared_samples_list:
+                            self.shared_samples_list.remove(sample)
 
-            corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
-            corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)
+                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
 
-            groups = []
-            for sample in self.all_sample_list:
-                groups.append(1)
-
-            try:
-                if self.do_PCA == True:
-                    self.pca_works = "True"
-                    self.pca_trait_ids = []
-                    pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
-                    self.loadings_array = self.process_loadings()
+                if num_overlap < self.lowest_overlap:
+                    self.lowest_overlap = num_overlap
+                if num_overlap < 2:
+                    corr_result_row.append([target_trait, 0, num_overlap])
+                    pca_corr_result_row.append(0)
                 else:
-                    self.pca_works = "False"
-            except:
-                self.pca_works = "False"
+                    pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
+                    if is_spearman == False:
+                        sample_r, sample_p = pearson_r, pearson_p
+                        if sample_r == 1:
+                            is_spearman = True
+                    else:
+                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
+
+                    corr_result_row.append([target_trait, sample_r, num_overlap])
+                    pca_corr_result_row.append(pearson_r)
 
-            self.js_data = dict(traits = [trait.name for trait in self.traits],
-                                groups = groups,
-                                cols = list(range(len(self.traits))),
-                                rows = list(range(len(self.traits))),
-                                samples = self.all_sample_list,
-                                sample_data = self.sample_data,)
-            #                    corr_results = [result[1] for result in result_row for result_row in self.corr_results])
+            self.corr_results.append(corr_result_row)
+            self.pca_corr_results.append(pca_corr_result_row)
+
+        self.trait_data_array = []
+        for trait_db in self.trait_list:
+            this_trait = trait_db[0]
+            this_db = trait_db[1]
+            this_db_samples = this_db.group.all_samples_ordered()
+            this_sample_data = this_trait.data
+
+            this_trait_vals = []
+            for index, sample in enumerate(this_db_samples):
+                if (sample in this_sample_data) and (sample in self.shared_samples_list):
+                    sample_value = this_sample_data[sample].value
+                    this_trait_vals.append(sample_value)
+            self.trait_data_array.append(this_trait_vals)
+
+        corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
+        corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)
+
+        groups = []
+        for sample in self.all_sample_list:
+            groups.append(1)
+
+        try:
+            if self.do_PCA == True:
+                self.pca_works = "True"
+                self.pca_trait_ids = []
+                pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
+                self.loadings_array = self.process_loadings()
+            else:
+                self.pca_works = "False"
+        except:
+            self.pca_works = "False"
+
+        self.js_data = dict(traits = [trait.name for trait in self.traits],
+                            groups = groups,
+                            cols = list(range(len(self.traits))),
+                            rows = list(range(len(self.traits))),
+                            samples = self.all_sample_list,
+                            sample_data = self.sample_data,)
 
     def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
         base = importr('base')
author	zsloan	2020-11-19 14:38:04 -0600
committer	zsloan	2020-11-19 14:38:04 -0600
commit	6cc806e65bee5652bbe761c10079017a5b44a160 (patch)
tree	1c3d78c2bdc6c0280b7dc001fe6f7acbb2413e6e /wqflask
parent	6ed037083f0b2bac95021e5fb00c0c8877422a47 (diff)
download	genenetwork2-6cc806e65bee5652bbe761c10079017a5b44a160.tar.gz