Merge pull request #727 from Alexanderlacuna/feature/use-textfiles

use text files for Probeset
author: Alexander Kabui 2022-09-08 14:04:29 +0300
committer: GitHub 2022-09-08 14:04:29 +0300
commit: c9572f96f061a28b3c68f9572bd796ed560233aa (patch)
tree: fc19dcb2f454ab5ac01e9b986a6f75b4330d14c0
parent: 583e308f99dbcacce6a3c544689cc4b57a00378f (diff)
parent: d467d314273b36189dd5936062f683fab57986f4 (diff)
download: genenetwork2-c9572f96f061a28b3c68f9572bd796ed560233aa.tar.gz
4 files changed, 54 insertions, 2 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 742eb61c..2f4c1154 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -1287,5 +1287,6 @@ def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List)
         with open(file_path, "r") as file_handler:
 
             return json.load(file_handler)
-    except FileNotFoundError:
+
+    except Exception:
         pass
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 39947158..371a94ab 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -7,7 +7,7 @@
 # with those in utility/tools.py
 #
 #########################################
-
+import os
 from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR
 
 # Debug Level
@@ -95,6 +95,9 @@ if not valid_path(JSON_GENODIR):
     # fall back on old location (move the dir, FIXME)
     JSON_GENODIR = flat_files('json')
 
+
+TEXTDIR = os.path.join(os.environ.get(
+    "GNSHARE", "/gnshare/gn/"), "web/ProbeSetFreeze_DataMatrix")
 # Are we using the following...?
 PORTADDR = "http://50.16.251.170"
 INFOPAGEHREF = '/dbdoc/%s.html'
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index cb2f4470..1c52a0f5 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -1,9 +1,11 @@
+import csv
 import json
 import os
 import hashlib
 from pathlib import Path
 
 from base.data_set import query_table_timestamp
+from base.webqtlConfig import TEXTDIR
 from base.webqtlConfig import TMPDIR
 
 from json.decoder import JSONDecodeError
@@ -167,3 +169,38 @@ def get_datasets_data(base_dataset, target_dataset_data):
         samples_fetched, base_traits_data)
 
     return (target_results, base_results)
+
+
+def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
+    """fetch textfiles with strain vals if exists"""
+
+    with conn.cursor() as cursor:
+        cursor.execute('SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,))
+        results = cursor.fetchone()
+    if results:
+        try:
+            for file in os.listdir(text_dir):
+                if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
+                    return os.path.join(text_dir, file)
+        except FileNotFoundError:
+            pass
+
+
+def read_text_file(sample_dict, file_path):
+
+    def __fetch_id_positions__(all_ids, target_ids):
+        _vals = []
+        _posit = [0]  # alternative for parsing
+
+        for (idx, strain) in enumerate(all_ids, 1):
+            if strain in target_ids:
+                _vals.append(target_ids[strain])
+                _posit.append(idx)
+
+        return (_posit, _vals)
+
+    with open(file_path) as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=',')
+        _posit, sample_vals = __fetch_id_positions__(
+            next(csv_reader)[1:], sample_dict)
+        return (sample_vals, [",".join([line[i] for i in _posit]) for line in csv_reader])
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index cfa360d0..251ada7b 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -8,6 +8,8 @@ from wqflask.correlation.correlation_functions\
 from wqflask.correlation.correlation_gn3_api import create_target_this_trait
 from wqflask.correlation.correlation_gn3_api import lit_for_trait_list
 from wqflask.correlation.correlation_gn3_api import do_lit_correlation
+from wqflask.correlation.pre_computes import fetch_text_file
+from wqflask.correlation.pre_computes import read_text_file
 from gn3.computations.correlations import compute_all_lit_correlation
 from gn3.computations.rust_correlation import run_correlation
 from gn3.computations.rust_correlation import get_sample_corr_data
@@ -219,6 +221,15 @@ def __compute_sample_corr__(
     sample_data = get_sample_corr_data(
         sample_type=start_vars["corr_samples_group"], all_samples=all_samples,
         dataset_samples=this_dataset.group.all_samples_ordered())
+
+    if target_dataset.type == "ProbeSet":
+        with database_connector() as conn:
+            file_path = fetch_text_file(target_dataset.name, conn)
+            if file_path:
+                (sample_vals, target_data) = read_text_file(
+                    sample_data, file_path)
+                return run_correlation(target_data, sample_vals, method, ",", corr_type, n_top)
+
     target_dataset.get_trait_data(list(sample_data.keys()))
 
     target_data = []
author	Alexander Kabui	2022-09-08 14:04:29 +0300
committer	GitHub	2022-09-08 14:04:29 +0300
commit	c9572f96f061a28b3c68f9572bd796ed560233aa (patch)
tree	fc19dcb2f454ab5ac01e9b986a6f75b4330d14c0
parent	583e308f99dbcacce6a3c544689cc4b57a00378f (diff)
parent	d467d314273b36189dd5936062f683fab57986f4 (diff)
download	genenetwork2-c9572f96f061a28b3c68f9572bd796ed560233aa.tar.gz