aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2022-02-25 17:33:19 +0300
committerAlexander Kabui2022-02-25 17:33:19 +0300
commitd5d0e0f3c271e056057c0311083ab3684ccc4386 (patch)
treef2dae433f1b5181419dc1f9b0e0d2c3624137345
parent0ee723d14957c01162a67f4f6b99a25d43908b5b (diff)
downloadgenenetwork2-d5d0e0f3c271e056057c0311083ab3684ccc4386.tar.gz
integrating generating temp dataset for pca
-rw-r--r--wqflask/wqflask/correlation_matrix/show_corr_matrix.py63
1 files changed, 32 insertions, 31 deletions
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index bcd73436..d5ec738b 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -19,7 +19,6 @@
# This module is used by GeneNetwork project (www.genenetwork.org)
import datetime
-import math
import random
import string
@@ -29,9 +28,6 @@ import scipy
from base import data_set
from base.webqtlConfig import GENERATED_TEXT_DIR
-from functools import reduce
-from functools import cmp_to_key
-from utility import webqtlUtil
from utility import helper_functions
from utility import corr_result_helpers
from utility.redis_tools import get_redis_conn
@@ -42,6 +38,8 @@ from gn3.computations.principal_component_analysis import compute_pca
from gn3.computations.principal_component_analysis import process_factor_loadings_tdata
from gn3.computations.principal_component_analysis import generate_pca_traits_vals
+from gn3.computations.principal_component_analysis import generate_pca_temp_dataset
+from gn3.computations.principal_component_analysis import cache_pca_dataset
Redis = get_redis_conn()
THIRTY_DAYS = 60 * 60 * 24 * 30
@@ -171,11 +169,11 @@ class CorrelationMatrix:
self.pca_works = "False"
try:
+
if self.do_PCA == True:
self.pca_works = "True"
self.pca_trait_ids = []
- pca = self.calculate_pca(
- list(range(len(self.traits))))
+ pca = self.calculate_pca()
self.loadings_array = process_factor_loadings_tdata(self.loadings,len(self.trait_list))
else:
self.pca_works = "False"
@@ -189,7 +187,7 @@ class CorrelationMatrix:
samples=self.all_sample_list,
sample_data=self.sample_data,)
- def calculate_pca(self, cols):
+ def calculate_pca(self):
pca = compute_pca(self.pca_corr_results)
@@ -197,36 +195,37 @@ class CorrelationMatrix:
self.loadings = pca["components"]
self.scores = pca["scores"]
- trait_array_vectors = generate_pca_traits_vals(self.trait_data_array,self.pca_corr_results)
-
-
-
- pca_traits = []
- for i, vector in enumerate(trait_array_vectors):
- # ZS: Check if below check is necessary
- # if corr_eigen_value[i-1] > 100.0/len(self.trait_list):
- pca_traits.append((vector * -1.0).tolist())
this_group_name = self.trait_list[0][1].group.name
temp_dataset = data_set.create_dataset(
dataset_name="Temp", dataset_type="Temp", group_name=this_group_name)
temp_dataset.group.get_samplelist()
- for i, pca_trait in enumerate(pca_traits):
- trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \
- this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
- this_vals_string = ""
- position = 0
- for sample in temp_dataset.group.all_samples_ordered():
- if sample in self.shared_samples_list:
- this_vals_string += str(pca_trait[position])
- this_vals_string += " "
- position += 1
- else:
- this_vals_string += "x "
- this_vals_string = this_vals_string[:-1]
- Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS)
- self.pca_trait_ids.append(trait_id)
+
+ species = temp_dataset.group.species
+
+ group =this_group_name
+
+ trait_data_array = self.trait_data_array
+
+ pca_corr = self.pca_corr_results
+
+ sample_list = temp_dataset.group.all_samples_ordered()
+
+
+ shared = self.shared_samples_list
+
+ dt_time = datetime.datetime.now().strftime("%m%d%H%M%S")
+
+
+
+ results = generate_pca_temp_dataset(species = species, group= group,traits_data = self.trait_data_array,corr_array = self.pca_corr_results,dataset_samples = sample_list, shared_samples=shared,create_time=dt_time)
+
+
+
+ cache_pca_dataset(Redis,THIRTY_DAYS,results)
+
+ self.pca_trait_ids = list(results.keys())
return pca
@@ -269,3 +268,5 @@ def export_corr_matrix(corr_results):
return corr_matrix_filename, matrix_export_path
+
+