diff options
author | Alexander Kabui | 2021-11-16 14:41:41 +0300 |
---|---|---|
committer | Alexander Kabui | 2021-11-16 14:41:41 +0300 |
commit | 04452c274d51621a0cab1b8dce5b8101c69496b6 (patch) | |
tree | 85dee26ccd3981b9ce6515617ab178b64e254b2d | |
parent | aab6393dd60872a6a3b6e7db2a7c087c4ec41295 (diff) | |
download | genenetwork2-04452c274d51621a0cab1b8dce5b8101c69496b6.tar.gz |
refactor:fix on the query :modify cache point
-rw-r--r-- | wqflask/base/data_set.py | 35 |
1 files changed, 18 insertions, 17 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 70c58136..a3a720ad 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -747,7 +747,9 @@ class DataSet: and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) results = dict(g.db.execute(query).fetchall()) - sample_ids = [results[item] for item in self.samplelist] + sample_ids = [results.get(item) for item in self.samplelist] + + sample_ids = [ids for ids in sample_ids if ids is not None] # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks @@ -800,25 +802,22 @@ class DataSet: results = g.db.execute(query).fetchall() trait_sample_data.append([list(result) for result in results]) - cache_dataset_results( - self.name, self.type, trait_sample_data) + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) - else: - trait_sample_data = cached_results - - trait_count = len(trait_sample_data[0]) - self.trait_data = collections.defaultdict(list) - - # put all of the separate data together into a dictionary where the keys are - # trait names and values are lists of sample values - data_start_pos = 1 - for trait_counter in range(trait_count): - trait_name = trait_sample_data[0][trait_counter][0] - for chunk_counter in range(int(number_chunks)): - self.trait_data[trait_name] += ( + data_start_pos = 1 + for trait_counter in range(trait_count): + trait_name = trait_sample_data[0][trait_counter][0] + for chunk_counter in range(int(number_chunks)): + self.trait_data[trait_name] += ( trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) + cache_dataset_results( + self.name, self.type, self.trait_data) + + else: + self.trait_data = cached_results class PhenotypeDataSet(DataSet): DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' @@ -1282,7 +1281,9 @@ def generate_hash_file(dataset_name: str, dataset_timestamp: str): def cache_dataset_results(dataset_name: str, dataset_type: str, query_results: List): - """function to cache dataset query results to file""" + """function to cache dataset query results to file + input dataset_name and type query_results(already processed in default dict format) + """ # data computations actions # store the file path on redis |