From 0718d987f1b15838d45a9c414ec6d2318ed65e90 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 31 Aug 2022 01:41:10 +0300
Subject: add function to fetch probeset text files

---
 wqflask/wqflask/correlation/pre_computes.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index cb2f4470..95047fc7 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -167,3 +167,14 @@ def get_datasets_data(base_dataset, target_dataset_data):
         samples_fetched, base_traits_data)
 
     return (target_results, base_results)
+
+def fetch_text_file(dataset_name, text_dir, conn):
+    """fetch textfiles with strain vals if exists"""
+    with conn.cursor() as cursor:
+        query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name
+        cursor.execute(query)
+        results = cursor.fetchone()
+    if (results):
+        for file in os.listdir(text_dir):
+            if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
+                return os.path.join(text_dir, file)
-- 
cgit 1.4.1


From 6652c3137ec477e02c8291ee6154f24cc3a4cef2 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 31 Aug 2022 01:43:33 +0300
Subject: read and parse text files

---
 wqflask/wqflask/correlation/pre_computes.py | 44 ++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 95047fc7..eb089a03 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -4,6 +4,7 @@ import hashlib
 from pathlib import Path
 
 from base.data_set import query_table_timestamp
+from base.webqtlConfig import TEXTDIR
 from base.webqtlConfig import TMPDIR
 
 from json.decoder import JSONDecodeError
@@ -168,13 +169,48 @@ def get_datasets_data(base_dataset, target_dataset_data):
 
     return (target_results, base_results)
 
-def fetch_text_file(dataset_name, text_dir, conn):
+
+def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
     """fetch textfiles with strain vals if exists"""
+
     with conn.cursor() as cursor:
         query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name
         cursor.execute(query)
         results = cursor.fetchone()
     if (results):
-        for file in os.listdir(text_dir):
-            if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
-                return os.path.join(text_dir, file)
+        try:
+            for file in os.listdir(text_dir):
+                if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
+                    return os.path.join(text_dir, file)
+        except FileNotFoundError:
+            pass
+
+
+def read_text_file(sample_dict, file_path):
+
+    def parse_line_csv(line):
+        return_list = line.split('","')
+        return_list[-1] = return_list[-1][:-2]
+        return_list[0] = return_list[0][1:]
+        return return_list
+
+    def __fetch_id_positions__(all_ids, target_ids):
+        _vals = []
+        _posit = [0]  # alternative for parsing
+
+        for (idx, strain) in enumerate(all_ids, 1):
+            if strain in target_ids:
+                _vals.append(target_ids[strain])
+                _posit.append(idx)
+
+            else:
+                _vals.append("")  # todo;modify x_vals to take string rust
+
+        return (_posit, _vals)
+    with open(file_path, "r") as file_handler:
+        all_ids = file_handler.readline()
+        _posit, sample_vals = __fetch_id_positions__(
+            parse_line_csv(all_ids)[1:], sample_dict)
+
+        return (sample_vals, [",".join(parse_line_csv(line))
+                              for line in file_handler.readlines()])
-- 
cgit 1.4.1


From 9ee7efd8b18d8caece9cf8d3cc1d58ecbcf209a6 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 31 Aug 2022 16:10:09 +0300
Subject: add new environment variable:TEXTDIR

---
 wqflask/base/webqtlConfig.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 39947158..371a94ab 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -7,7 +7,7 @@
 # with those in utility/tools.py
 #
 #########################################
-
+import os
 from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR
 
 # Debug Level
@@ -95,6 +95,9 @@ if not valid_path(JSON_GENODIR):
     # fall back on old location (move the dir, FIXME)
     JSON_GENODIR = flat_files('json')
 
+
+TEXTDIR = os.path.join(os.environ.get(
+    "GNSHARE", "/gnshare/gn/"), "web/ProbeSetFreeze_DataMatrix")
 # Are we using the following...?
 PORTADDR = "http://50.16.251.170"
 INFOPAGEHREF = '/dbdoc/%s.html'
-- 
cgit 1.4.1


From d6c8505606b7b9fa1e41b3ee8bca303a0f8d6597 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 31 Aug 2022 18:02:35 +0300
Subject: integrate text files

---
 wqflask/wqflask/correlation/rust_correlation.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index 5109c72e..f06ee95c 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -8,6 +8,8 @@ from wqflask.correlation.correlation_functions\
 from wqflask.correlation.correlation_gn3_api import create_target_this_trait
 from wqflask.correlation.correlation_gn3_api import lit_for_trait_list
 from wqflask.correlation.correlation_gn3_api import do_lit_correlation
+from wqflask.correlation.pre_computes import fetch_text_file
+from wqflask.correlation.pre_computes import read_text_file
 from gn3.computations.correlations import compute_all_lit_correlation
 from gn3.computations.rust_correlation import run_correlation
 from gn3.computations.rust_correlation import get_sample_corr_data
@@ -210,6 +212,15 @@ def __compute_sample_corr__(
     sample_data = get_sample_corr_data(
         sample_type=start_vars["corr_samples_group"], all_samples=all_samples,
         dataset_samples=this_dataset.group.all_samples_ordered())
+
+    if target_dataset.type == "ProbeSet":
+        with database_connector() as conn:
+            file_path = fetch_text_file(target_dataset.name, conn)
+            if file_path:
+                (sample_vals, target_data) = read_text_file(
+                    sample_data, file_path)
+                return run_correlation(target_data, sample_vals, method, ",", corr_type, n_top)
+
     target_dataset.get_trait_data(list(sample_data.keys()))
 
     target_data = []
-- 
cgit 1.4.1


From f24df16b8629d9c4f869c0ccdaa245e8ba7a0b2e Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 7 Sep 2022 00:13:34 +0300
Subject: filter list vals with index

---
 wqflask/wqflask/correlation/pre_computes.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index eb089a03..aacc3071 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -194,6 +194,10 @@ def read_text_file(sample_dict, file_path):
         return_list[0] = return_list[0][1:]
         return return_list
 
+    def filter_line_with_index(line, index):
+        lst = parse_line_csv(line)
+        return ",".join([lst[i] for i in index])
+
     def __fetch_id_positions__(all_ids, target_ids):
         _vals = []
         _posit = [0]  # alternative for parsing
@@ -203,14 +207,10 @@ def read_text_file(sample_dict, file_path):
                 _vals.append(target_ids[strain])
                 _posit.append(idx)
 
-            else:
-                _vals.append("")  # todo;modify x_vals to take string rust
-
         return (_posit, _vals)
     with open(file_path, "r") as file_handler:
         all_ids = file_handler.readline()
         _posit, sample_vals = __fetch_id_positions__(
             parse_line_csv(all_ids)[1:], sample_dict)
-
-        return (sample_vals, [",".join(parse_line_csv(line))
+        return (sample_vals, [filter_line_with_index(line, _posit)
                               for line in file_handler.readlines()])
-- 
cgit 1.4.1


From d6cd1e457e059eb7aa2b0a93b481e643fab2cfd6 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 7 Sep 2022 19:38:59 +0300
Subject: use csv to parse files

---
 wqflask/wqflask/correlation/pre_computes.py | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index aacc3071..b8a78a45 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -1,3 +1,4 @@
+import csv
 import json
 import os
 import hashlib
@@ -174,10 +175,10 @@ def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
     """fetch textfiles with strain vals if exists"""
 
     with conn.cursor() as cursor:
-        query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name
-        cursor.execute(query)
+        cursor.execute(
+            'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name)
         results = cursor.fetchone()
-    if (results):
+    if results:
         try:
             for file in os.listdir(text_dir):
                 if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
@@ -188,16 +189,6 @@ def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
 
 def read_text_file(sample_dict, file_path):
 
-    def parse_line_csv(line):
-        return_list = line.split('","')
-        return_list[-1] = return_list[-1][:-2]
-        return_list[0] = return_list[0][1:]
-        return return_list
-
-    def filter_line_with_index(line, index):
-        lst = parse_line_csv(line)
-        return ",".join([lst[i] for i in index])
-
     def __fetch_id_positions__(all_ids, target_ids):
         _vals = []
         _posit = [0]  # alternative for parsing
@@ -208,9 +199,9 @@ def read_text_file(sample_dict, file_path):
                 _posit.append(idx)
 
         return (_posit, _vals)
-    with open(file_path, "r") as file_handler:
-        all_ids = file_handler.readline()
+
+    with open(file_path) as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=',')
         _posit, sample_vals = __fetch_id_positions__(
-            parse_line_csv(all_ids)[1:], sample_dict)
-        return (sample_vals, [filter_line_with_index(line, _posit)
-                              for line in file_handler.readlines()])
+            next(csv_reader)[1:], sample_dict)
+        return (sample_vals, [",".join([line[i] for i in _posit]) for line in csv_reader])
-- 
cgit 1.4.1


From 551fd9867b924b6cee5f9030a421bc22dd87c2cd Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Wed, 7 Sep 2022 20:49:46 +0300
Subject: catch general errors this will recreate the file

---
 wqflask/base/data_set.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 211c6752..72906515 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -1321,5 +1321,6 @@ def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List)
         with open(file_path, "r") as file_handler:
 
             return json.load(file_handler)
-    except FileNotFoundError:
+
+    except Exception:
         pass
-- 
cgit 1.4.1


From d467d314273b36189dd5936062f683fab57986f4 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui
Date: Thu, 8 Sep 2022 13:59:27 +0300
Subject: fixup  query formatting

---
 wqflask/wqflask/correlation/pre_computes.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index b8a78a45..1c52a0f5 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -175,8 +175,7 @@ def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
     """fetch textfiles with strain vals if exists"""
 
     with conn.cursor() as cursor:
-        cursor.execute(
-            'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name)
+        cursor.execute('SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,))
         results = cursor.fetchone()
     if results:
         try:
-- 
cgit 1.4.1