From bbc75dcef80c3df600ab01c1804a27cdfdce1b80 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Nov 2021 02:51:44 +0300
Subject: init test for precomputing sample correlation

---
 wqflask/wqflask/correlation/pre_computes.py | 72 +++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 wqflask/wqflask/correlation/pre_computes.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
new file mode 100644
index 00000000..1db9f61b
--- /dev/null
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -0,0 +1,72 @@
+"""module contains the code to do the 
+precomputations of sample data between
+two entire datasets"""
+
+import json
+from typing import List
+from base import data_set
+
+from gn3.computations.correlations import fast_compute_all_sample_correlation
+from gn3.computations.correlations import map_shared_keys_to_values
+
+def get_dataset_dict_data(dataset_obj):
+    """function to get the dataset data mapped to key"""
+    dataset_obj.get_trait_data()
+    return map_shared_keys_to_values(dataset_obj.samplelist,
+                                     dataset_obj.trait_data)
+
+
+def fetch_datasets(base_dataset_name: str, target_dataset_name: str) ->List:
+    """query to fetch create datasets and fetch traits
+    all traits of a dataset"""
+
+    # doesnt work for temp
+
+    base_dataset = data_set.create_dataset(dataset_name=base_dataset_name)
+
+    target_dataset = data_set.create_dataset(dataset_name=target_dataset_name)
+    # replace with map
+
+    return (map(get_dataset_dict_data,
+                [base_dataset, target_dataset]))
+
+
+# in the base dataset we just need the traits
+def pre_compute_sample_correlation(base_dataset: List,
+                                   target_dataset: List) -> List:
+    """function compute the correlation between the
+    a whole dataset against a target
+    input: target&base_dataset(contains traits and sample results)
+    output: list containing the computed results
+
+    precaution:function is expensive;targets only Exon and
+    """
+
+    for trait_info in base_dataset:
+
+        yield fast_compute_all_sample_correlation(corr_method="pearson",
+                                                  this_trait=trait_info,
+                                                  target_dataset=target_dataset)
+
+
+def cache_to_file(base_dataset_name: str, target_dataset_name: str):
+    """function to cache the results to file"""
+
+    # validate the datasets expiry first
+
+    base_dataset_data, target_dataset_data = [list(dataset) for dataset in list(
+        fetch_datasets(base_dataset_name, target_dataset_name))]
+
+
+    try:
+        with open("unique_file_name.json", "w") as file_handler:
+        file_handler.write()
+
+        dataset_correlation_results = list(pre_compute_sample_correlation(
+            base_dataset_data, target_dataset_data))
+
+        print(dataset_correlation_results)
+
+        json.dump(dataset_correlation_results, file_handler)
+    except Exception as error:
+        raise error
-- 
cgit v1.2.3