From 6cc806e65bee5652bbe761c10079017a5b44a160 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 19 Nov 2020 14:38:04 -0600
Subject: Removed lines that check that all traits are part of the same group,
 since it might be the case in the future that different groups still share
 sample names (and it's not really necessary since we check how many samples
 are shraed between each individual pair of traits)

---
 .../wqflask/correlation_matrix/show_corr_matrix.py | 210 ++++++++++-----------
 1 file changed, 101 insertions(+), 109 deletions(-)

diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index a394f548..0269ce68 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -55,11 +55,7 @@ class CorrelationMatrix(object):
         self.do_PCA = True
         this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop
         for trait_db in self.trait_list:
-            if trait_db[1].group.name != this_group:
-                self.insufficient_shared_samples = True
-                break
-            else:
-                this_group = trait_db[1].group.name
+            this_group = trait_db[1].group.name
             this_trait = trait_db[0]
             self.traits.append(this_trait)
             this_sample_data = this_trait.data
@@ -68,119 +64,115 @@ class CorrelationMatrix(object):
                 if sample not in self.all_sample_list:
                     self.all_sample_list.append(sample)
 
-        if self.insufficient_shared_samples:
-            pass
-        else:
-            self.sample_data = []
-            for trait_db in self.trait_list:
-                this_trait = trait_db[0]
-                this_sample_data = this_trait.data
+        self.sample_data = []
+        for trait_db in self.trait_list:
+            this_trait = trait_db[0]
+            this_sample_data = this_trait.data
 
-                this_trait_vals = []
-                for sample in self.all_sample_list:
-                    if sample in this_sample_data:
-                        this_trait_vals.append(this_sample_data[sample].value)
-                    else:
-                        this_trait_vals.append('')
-                self.sample_data.append(this_trait_vals)
-
-            if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
-                self.do_PCA = False
-
-            self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)
-
-            self.corr_results = []
-            self.pca_corr_results = []
-            self.shared_samples_list = self.all_sample_list
-            for trait_db in self.trait_list:
-                this_trait = trait_db[0]
-                this_db = trait_db[1]
-
-                this_db_samples = this_db.group.all_samples_ordered()
-                this_sample_data = this_trait.data
-
-                corr_result_row = []
-                pca_corr_result_row = []
-                is_spearman = False #ZS: To determine if it's above or below the diagonal
-                for target in self.trait_list:
-                    target_trait = target[0]
-                    target_db = target[1]
-                    target_samples = target_db.group.all_samples_ordered()
-                    target_sample_data = target_trait.data
-
-                    this_trait_vals = []
-                    target_vals = []
-                    for index, sample in enumerate(target_samples):
-                        if (sample in this_sample_data) and (sample in target_sample_data):
-                            sample_value = this_sample_data[sample].value
-                            target_sample_value = target_sample_data[sample].value
-                            this_trait_vals.append(sample_value)
-                            target_vals.append(target_sample_value)
-                        else:
-                            if sample in self.shared_samples_list:
-                                self.shared_samples_list.remove(sample)
-
-                    this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
-
-                    if num_overlap < self.lowest_overlap:
-                        self.lowest_overlap = num_overlap
-                    if num_overlap < 2:
-                        corr_result_row.append([target_trait, 0, num_overlap])
-                        pca_corr_result_row.append(0)
-                    else:
-                        pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
-                        if is_spearman == False:
-                            sample_r, sample_p = pearson_r, pearson_p
-                            if sample_r == 1:
-                                is_spearman = True
-                        else:
-                            sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
-
-                        corr_result_row.append([target_trait, sample_r, num_overlap])
-                        pca_corr_result_row.append(pearson_r)
-
-                self.corr_results.append(corr_result_row)
-                self.pca_corr_results.append(pca_corr_result_row)
-
-            self.trait_data_array = []
-            for trait_db in self.trait_list:
-                this_trait = trait_db[0]
-                this_db = trait_db[1]
-                this_db_samples = this_db.group.all_samples_ordered()
-                this_sample_data = this_trait.data
+            this_trait_vals = []
+            for sample in self.all_sample_list:
+                if sample in this_sample_data:
+                    this_trait_vals.append(this_sample_data[sample].value)
+                else:
+                    this_trait_vals.append('')
+            self.sample_data.append(this_trait_vals)
+
+        if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
+            self.do_PCA = False
+
+        self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)
+
+        self.corr_results = []
+        self.pca_corr_results = []
+        self.shared_samples_list = self.all_sample_list
+        for trait_db in self.trait_list:
+            this_trait = trait_db[0]
+            this_db = trait_db[1]
+
+            this_db_samples = this_db.group.all_samples_ordered()
+            this_sample_data = this_trait.data
+
+            corr_result_row = []
+            pca_corr_result_row = []
+            is_spearman = False #ZS: To determine if it's above or below the diagonal
+            for target in self.trait_list:
+                target_trait = target[0]
+                target_db = target[1]
+                target_samples = target_db.group.all_samples_ordered()
+                target_sample_data = target_trait.data
 
                 this_trait_vals = []
-                for index, sample in enumerate(this_db_samples):
-                    if (sample in this_sample_data) and (sample in self.shared_samples_list):
+                target_vals = []
+                for index, sample in enumerate(target_samples):
+                    if (sample in this_sample_data) and (sample in target_sample_data):
                         sample_value = this_sample_data[sample].value
+                        target_sample_value = target_sample_data[sample].value
                         this_trait_vals.append(sample_value)
-                self.trait_data_array.append(this_trait_vals)
+                        target_vals.append(target_sample_value)
+                    else:
+                        if sample in self.shared_samples_list:
+                            self.shared_samples_list.remove(sample)
 
-            corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
-            corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)
+                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
 
-            groups = []
-            for sample in self.all_sample_list:
-                groups.append(1)
-
-            try:
-                if self.do_PCA == True:
-                    self.pca_works = "True"
-                    self.pca_trait_ids = []
-                    pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
-                    self.loadings_array = self.process_loadings()
+                if num_overlap < self.lowest_overlap:
+                    self.lowest_overlap = num_overlap
+                if num_overlap < 2:
+                    corr_result_row.append([target_trait, 0, num_overlap])
+                    pca_corr_result_row.append(0)
                 else:
-                    self.pca_works = "False"
-            except:
-                self.pca_works = "False"
+                    pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
+                    if is_spearman == False:
+                        sample_r, sample_p = pearson_r, pearson_p
+                        if sample_r == 1:
+                            is_spearman = True
+                    else:
+                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
+
+                    corr_result_row.append([target_trait, sample_r, num_overlap])
+                    pca_corr_result_row.append(pearson_r)
 
-            self.js_data = dict(traits = [trait.name for trait in self.traits],
-                                groups = groups,
-                                cols = list(range(len(self.traits))),
-                                rows = list(range(len(self.traits))),
-                                samples = self.all_sample_list,
-                                sample_data = self.sample_data,)
-            #                    corr_results = [result[1] for result in result_row for result_row in self.corr_results])
+            self.corr_results.append(corr_result_row)
+            self.pca_corr_results.append(pca_corr_result_row)
+
+        self.trait_data_array = []
+        for trait_db in self.trait_list:
+            this_trait = trait_db[0]
+            this_db = trait_db[1]
+            this_db_samples = this_db.group.all_samples_ordered()
+            this_sample_data = this_trait.data
+
+            this_trait_vals = []
+            for index, sample in enumerate(this_db_samples):
+                if (sample in this_sample_data) and (sample in self.shared_samples_list):
+                    sample_value = this_sample_data[sample].value
+                    this_trait_vals.append(sample_value)
+            self.trait_data_array.append(this_trait_vals)
+
+        corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
+        corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)
+
+        groups = []
+        for sample in self.all_sample_list:
+            groups.append(1)
+
+        try:
+            if self.do_PCA == True:
+                self.pca_works = "True"
+                self.pca_trait_ids = []
+                pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
+                self.loadings_array = self.process_loadings()
+            else:
+                self.pca_works = "False"
+        except:
+            self.pca_works = "False"
+
+        self.js_data = dict(traits = [trait.name for trait in self.traits],
+                            groups = groups,
+                            cols = list(range(len(self.traits))),
+                            rows = list(range(len(self.traits))),
+                            samples = self.all_sample_list,
+                            sample_data = self.sample_data,)
 
     def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
         base = importr('base')
-- 
cgit 1.4.1