aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2022-09-08 14:04:29 +0300
committerGitHub2022-09-08 14:04:29 +0300
commitc9572f96f061a28b3c68f9572bd796ed560233aa (patch)
treefc19dcb2f454ab5ac01e9b986a6f75b4330d14c0
parent583e308f99dbcacce6a3c544689cc4b57a00378f (diff)
parentd467d314273b36189dd5936062f683fab57986f4 (diff)
downloadgenenetwork2-c9572f96f061a28b3c68f9572bd796ed560233aa.tar.gz
Merge pull request #727 from Alexanderlacuna/feature/use-textfiles
use text files for Probeset
-rw-r--r--wqflask/base/data_set.py3
-rw-r--r--wqflask/base/webqtlConfig.py5
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py37
-rw-r--r--wqflask/wqflask/correlation/rust_correlation.py11
4 files changed, 54 insertions, 2 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 742eb61c..2f4c1154 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -1287,5 +1287,6 @@ def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List)
with open(file_path, "r") as file_handler:
return json.load(file_handler)
- except FileNotFoundError:
+
+ except Exception:
pass
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 39947158..371a94ab 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -7,7 +7,7 @@
# with those in utility/tools.py
#
#########################################
-
+import os
from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR
# Debug Level
@@ -95,6 +95,9 @@ if not valid_path(JSON_GENODIR):
# fall back on old location (move the dir, FIXME)
JSON_GENODIR = flat_files('json')
+
+TEXTDIR = os.path.join(os.environ.get(
+ "GNSHARE", "/gnshare/gn/"), "web/ProbeSetFreeze_DataMatrix")
# Are we using the following...?
PORTADDR = "http://50.16.251.170"
INFOPAGEHREF = '/dbdoc/%s.html'
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index cb2f4470..1c52a0f5 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -1,9 +1,11 @@
+import csv
import json
import os
import hashlib
from pathlib import Path
from base.data_set import query_table_timestamp
+from base.webqtlConfig import TEXTDIR
from base.webqtlConfig import TMPDIR
from json.decoder import JSONDecodeError
@@ -167,3 +169,38 @@ def get_datasets_data(base_dataset, target_dataset_data):
samples_fetched, base_traits_data)
return (target_results, base_results)
+
+
+def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
+ """fetch textfiles with strain vals if exists"""
+
+ with conn.cursor() as cursor:
+ cursor.execute('SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,))
+ results = cursor.fetchone()
+ if results:
+ try:
+ for file in os.listdir(text_dir):
+ if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
+ return os.path.join(text_dir, file)
+ except FileNotFoundError:
+ pass
+
+
+def read_text_file(sample_dict, file_path):
+
+ def __fetch_id_positions__(all_ids, target_ids):
+ _vals = []
+ _posit = [0] # alternative for parsing
+
+ for (idx, strain) in enumerate(all_ids, 1):
+ if strain in target_ids:
+ _vals.append(target_ids[strain])
+ _posit.append(idx)
+
+ return (_posit, _vals)
+
+ with open(file_path) as csv_file:
+ csv_reader = csv.reader(csv_file, delimiter=',')
+ _posit, sample_vals = __fetch_id_positions__(
+ next(csv_reader)[1:], sample_dict)
+ return (sample_vals, [",".join([line[i] for i in _posit]) for line in csv_reader])
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index cfa360d0..251ada7b 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -8,6 +8,8 @@ from wqflask.correlation.correlation_functions\
from wqflask.correlation.correlation_gn3_api import create_target_this_trait
from wqflask.correlation.correlation_gn3_api import lit_for_trait_list
from wqflask.correlation.correlation_gn3_api import do_lit_correlation
+from wqflask.correlation.pre_computes import fetch_text_file
+from wqflask.correlation.pre_computes import read_text_file
from gn3.computations.correlations import compute_all_lit_correlation
from gn3.computations.rust_correlation import run_correlation
from gn3.computations.rust_correlation import get_sample_corr_data
@@ -219,6 +221,15 @@ def __compute_sample_corr__(
sample_data = get_sample_corr_data(
sample_type=start_vars["corr_samples_group"], all_samples=all_samples,
dataset_samples=this_dataset.group.all_samples_ordered())
+
+ if target_dataset.type == "ProbeSet":
+ with database_connector() as conn:
+ file_path = fetch_text_file(target_dataset.name, conn)
+ if file_path:
+ (sample_vals, target_data) = read_text_file(
+ sample_data, file_path)
+ return run_correlation(target_data, sample_vals, method, ",", corr_type, n_top)
+
target_dataset.get_trait_data(list(sample_data.keys()))
target_data = []