From 04452c274d51621a0cab1b8dce5b8101c69496b6 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Tue, 16 Nov 2021 14:41:41 +0300
Subject: refactor:fix on the query :modify cache point

---
 wqflask/base/data_set.py | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'wqflask/base/data_set.py')

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 70c58136..a3a720ad 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -747,7 +747,9 @@ class DataSet:
             and Species.name = '{}'
             """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
         results = dict(g.db.execute(query).fetchall())
-        sample_ids = [results[item] for item in self.samplelist]
+        sample_ids = [results.get(item) for item in self.samplelist]
+
+        sample_ids = [ids for ids in sample_ids if ids is not None]
 
         # MySQL limits the number of tables that can be used in a join to 61,
         # so we break the sample ids into smaller chunks
@@ -800,25 +802,22 @@ class DataSet:
                 results = g.db.execute(query).fetchall()
                 trait_sample_data.append([list(result) for result in results])
 
-            cache_dataset_results(
-                self.name, self.type, trait_sample_data)
+            trait_count = len(trait_sample_data[0])
+            self.trait_data = collections.defaultdict(list)
 
-        else:
-            trait_sample_data = cached_results
-
-        trait_count = len(trait_sample_data[0])
-        self.trait_data = collections.defaultdict(list)
-
-        # put all of the separate data together into a dictionary where the keys are
-        # trait names and values are lists of sample values
-        data_start_pos = 1
-        for trait_counter in range(trait_count):
-            trait_name = trait_sample_data[0][trait_counter][0]
-            for chunk_counter in range(int(number_chunks)):
-                self.trait_data[trait_name] += (
+            data_start_pos = 1
+            for trait_counter in range(trait_count):
+                trait_name = trait_sample_data[0][trait_counter][0]
+                for chunk_counter in range(int(number_chunks)):
+                    self.trait_data[trait_name] += (
                     trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
 
+            cache_dataset_results(
+                self.name, self.type, self.trait_data)
+
+        else:
 
+            self.trait_data = cached_results
 class PhenotypeDataSet(DataSet):
     DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
 
@@ -1282,7 +1281,9 @@ def generate_hash_file(dataset_name: str, dataset_timestamp: str):
 
 
 def cache_dataset_results(dataset_name: str, dataset_type: str, query_results: List):
-    """function to cache dataset query results to file"""
+    """function to cache dataset query results to file
+    input dataset_name and type query_results(already processed in default dict format)
+    """
     # data computations actions
     # store the file path on redis
 
-- 
cgit v1.2.3