From 0718d987f1b15838d45a9c414ec6d2318ed65e90 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 31 Aug 2022 01:41:10 +0300 Subject: add function to fetch probeset text files --- wqflask/wqflask/correlation/pre_computes.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index cb2f4470..95047fc7 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -167,3 +167,14 @@ def get_datasets_data(base_dataset, target_dataset_data): samples_fetched, base_traits_data) return (target_results, base_results) + +def fetch_text_file(dataset_name, text_dir, conn): + """fetch textfiles with strain vals if exists""" + with conn.cursor() as cursor: + query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name + cursor.execute(query) + results = cursor.fetchone() + if (results): + for file in os.listdir(text_dir): + if file.startswith(f"ProbeSetFreezeId_{results[0]}_"): + return os.path.join(text_dir, file) -- cgit v1.2.3 From 6652c3137ec477e02c8291ee6154f24cc3a4cef2 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 31 Aug 2022 01:43:33 +0300 Subject: read and parse text files --- wqflask/wqflask/correlation/pre_computes.py | 44 ++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index 95047fc7..eb089a03 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -4,6 +4,7 @@ import hashlib from pathlib import Path from base.data_set import query_table_timestamp +from base.webqtlConfig import TEXTDIR from base.webqtlConfig import TMPDIR from json.decoder import JSONDecodeError @@ -168,13 +169,48 @@ def get_datasets_data(base_dataset, target_dataset_data): return (target_results, base_results) -def fetch_text_file(dataset_name, text_dir, conn): + +def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR): """fetch textfiles with strain vals if exists""" + with conn.cursor() as cursor: query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name cursor.execute(query) results = cursor.fetchone() if (results): - for file in os.listdir(text_dir): - if file.startswith(f"ProbeSetFreezeId_{results[0]}_"): - return os.path.join(text_dir, file) + try: + for file in os.listdir(text_dir): + if file.startswith(f"ProbeSetFreezeId_{results[0]}_"): + return os.path.join(text_dir, file) + except FileNotFoundError: + pass + + +def read_text_file(sample_dict, file_path): + + def parse_line_csv(line): + return_list = line.split('","') + return_list[-1] = return_list[-1][:-2] + return_list[0] = return_list[0][1:] + return return_list + + def __fetch_id_positions__(all_ids, target_ids): + _vals = [] + _posit = [0] # alternative for parsing + + for (idx, strain) in enumerate(all_ids, 1): + if strain in target_ids: + _vals.append(target_ids[strain]) + _posit.append(idx) + + else: + _vals.append("") # todo;modify x_vals to take string rust + + return (_posit, _vals) + with open(file_path, "r") as file_handler: + all_ids = file_handler.readline() + _posit, sample_vals = __fetch_id_positions__( + parse_line_csv(all_ids)[1:], sample_dict) + + return (sample_vals, [",".join(parse_line_csv(line)) + for line in file_handler.readlines()]) -- cgit v1.2.3 From 9ee7efd8b18d8caece9cf8d3cc1d58ecbcf209a6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 31 Aug 2022 16:10:09 +0300 Subject: add new environment variable:TEXTDIR --- wqflask/base/webqtlConfig.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 39947158..371a94ab 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -7,7 +7,7 @@ # with those in utility/tools.py # ######################################### - +import os from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR # Debug Level @@ -95,6 +95,9 @@ if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') + +TEXTDIR = os.path.join(os.environ.get( + "GNSHARE", "/gnshare/gn/"), "web/ProbeSetFreeze_DataMatrix") # Are we using the following...? PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' -- cgit v1.2.3 From d6c8505606b7b9fa1e41b3ee8bca303a0f8d6597 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 31 Aug 2022 18:02:35 +0300 Subject: integrate text files --- wqflask/wqflask/correlation/rust_correlation.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py index 5109c72e..f06ee95c 100644 --- a/wqflask/wqflask/correlation/rust_correlation.py +++ b/wqflask/wqflask/correlation/rust_correlation.py @@ -8,6 +8,8 @@ from wqflask.correlation.correlation_functions\ from wqflask.correlation.correlation_gn3_api import create_target_this_trait from wqflask.correlation.correlation_gn3_api import lit_for_trait_list from wqflask.correlation.correlation_gn3_api import do_lit_correlation +from wqflask.correlation.pre_computes import fetch_text_file +from wqflask.correlation.pre_computes import read_text_file from gn3.computations.correlations import compute_all_lit_correlation from gn3.computations.rust_correlation import run_correlation from gn3.computations.rust_correlation import get_sample_corr_data @@ -210,6 +212,15 @@ def __compute_sample_corr__( sample_data = get_sample_corr_data( sample_type=start_vars["corr_samples_group"], all_samples=all_samples, dataset_samples=this_dataset.group.all_samples_ordered()) + + if target_dataset.type == "ProbeSet": + with database_connector() as conn: + file_path = fetch_text_file(target_dataset.name, conn) + if file_path: + (sample_vals, target_data) = read_text_file( + sample_data, file_path) + return run_correlation(target_data, sample_vals, method, ",", corr_type, n_top) + target_dataset.get_trait_data(list(sample_data.keys())) target_data = [] -- cgit v1.2.3 From f24df16b8629d9c4f869c0ccdaa245e8ba7a0b2e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 7 Sep 2022 00:13:34 +0300 Subject: filter list vals with index --- wqflask/wqflask/correlation/pre_computes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index eb089a03..aacc3071 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -194,6 +194,10 @@ def read_text_file(sample_dict, file_path): return_list[0] = return_list[0][1:] return return_list + def filter_line_with_index(line, index): + lst = parse_line_csv(line) + return ",".join([lst[i] for i in index]) + def __fetch_id_positions__(all_ids, target_ids): _vals = [] _posit = [0] # alternative for parsing @@ -203,14 +207,10 @@ def read_text_file(sample_dict, file_path): _vals.append(target_ids[strain]) _posit.append(idx) - else: - _vals.append("") # todo;modify x_vals to take string rust - return (_posit, _vals) with open(file_path, "r") as file_handler: all_ids = file_handler.readline() _posit, sample_vals = __fetch_id_positions__( parse_line_csv(all_ids)[1:], sample_dict) - - return (sample_vals, [",".join(parse_line_csv(line)) + return (sample_vals, [filter_line_with_index(line, _posit) for line in file_handler.readlines()]) -- cgit v1.2.3 From d6cd1e457e059eb7aa2b0a93b481e643fab2cfd6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 7 Sep 2022 19:38:59 +0300 Subject: use csv to parse files --- wqflask/wqflask/correlation/pre_computes.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index aacc3071..b8a78a45 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -1,3 +1,4 @@ +import csv import json import os import hashlib @@ -174,10 +175,10 @@ def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR): """fetch textfiles with strain vals if exists""" with conn.cursor() as cursor: - query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name - cursor.execute(query) + cursor.execute( + 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name) results = cursor.fetchone() - if (results): + if results: try: for file in os.listdir(text_dir): if file.startswith(f"ProbeSetFreezeId_{results[0]}_"): @@ -188,16 +189,6 @@ def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR): def read_text_file(sample_dict, file_path): - def parse_line_csv(line): - return_list = line.split('","') - return_list[-1] = return_list[-1][:-2] - return_list[0] = return_list[0][1:] - return return_list - - def filter_line_with_index(line, index): - lst = parse_line_csv(line) - return ",".join([lst[i] for i in index]) - def __fetch_id_positions__(all_ids, target_ids): _vals = [] _posit = [0] # alternative for parsing @@ -208,9 +199,9 @@ def read_text_file(sample_dict, file_path): _posit.append(idx) return (_posit, _vals) - with open(file_path, "r") as file_handler: - all_ids = file_handler.readline() + + with open(file_path) as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') _posit, sample_vals = __fetch_id_positions__( - parse_line_csv(all_ids)[1:], sample_dict) - return (sample_vals, [filter_line_with_index(line, _posit) - for line in file_handler.readlines()]) + next(csv_reader)[1:], sample_dict) + return (sample_vals, [",".join([line[i] for i in _posit]) for line in csv_reader]) -- cgit v1.2.3 From 551fd9867b924b6cee5f9030a421bc22dd87c2cd Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 7 Sep 2022 20:49:46 +0300 Subject: catch general errors this will recreate the file --- wqflask/base/data_set.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 211c6752..72906515 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1321,5 +1321,6 @@ def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List) with open(file_path, "r") as file_handler: return json.load(file_handler) - except FileNotFoundError: + + except Exception: pass -- cgit v1.2.3 From d467d314273b36189dd5936062f683fab57986f4 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 8 Sep 2022 13:59:27 +0300 Subject: fixup query formatting --- wqflask/wqflask/correlation/pre_computes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index b8a78a45..1c52a0f5 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -175,8 +175,7 @@ def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR): """fetch textfiles with strain vals if exists""" with conn.cursor() as cursor: - cursor.execute( - 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name) + cursor.execute('SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,)) results = cursor.fetchone() if results: try: -- cgit v1.2.3