diff options
author | Alexander Kabui | 2022-09-08 14:04:29 +0300 |
---|---|---|
committer | GitHub | 2022-09-08 14:04:29 +0300 |
commit | c9572f96f061a28b3c68f9572bd796ed560233aa (patch) | |
tree | fc19dcb2f454ab5ac01e9b986a6f75b4330d14c0 /wqflask | |
parent | 583e308f99dbcacce6a3c544689cc4b57a00378f (diff) | |
parent | d467d314273b36189dd5936062f683fab57986f4 (diff) | |
download | genenetwork2-c9572f96f061a28b3c68f9572bd796ed560233aa.tar.gz |
Merge pull request #727 from Alexanderlacuna/feature/use-textfiles
use text files for Probeset
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/base/data_set.py | 3 | ||||
-rw-r--r-- | wqflask/base/webqtlConfig.py | 5 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/pre_computes.py | 37 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/rust_correlation.py | 11 |
4 files changed, 54 insertions, 2 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 742eb61c..2f4c1154 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1287,5 +1287,6 @@ def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List) with open(file_path, "r") as file_handler: return json.load(file_handler) - except FileNotFoundError: + + except Exception: pass diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 39947158..371a94ab 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -7,7 +7,7 @@ # with those in utility/tools.py # ######################################### - +import os from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR # Debug Level @@ -95,6 +95,9 @@ if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') + +TEXTDIR = os.path.join(os.environ.get( + "GNSHARE", "/gnshare/gn/"), "web/ProbeSetFreeze_DataMatrix") # Are we using the following...? PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index cb2f4470..1c52a0f5 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -1,9 +1,11 @@ +import csv import json import os import hashlib from pathlib import Path from base.data_set import query_table_timestamp +from base.webqtlConfig import TEXTDIR from base.webqtlConfig import TMPDIR from json.decoder import JSONDecodeError @@ -167,3 +169,38 @@ def get_datasets_data(base_dataset, target_dataset_data): samples_fetched, base_traits_data) return (target_results, base_results) + + +def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR): + """fetch textfiles with strain vals if exists""" + + with conn.cursor() as cursor: + cursor.execute('SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,)) + results = cursor.fetchone() + if results: + try: + for file in os.listdir(text_dir): + if file.startswith(f"ProbeSetFreezeId_{results[0]}_"): + return os.path.join(text_dir, file) + except FileNotFoundError: + pass + + +def read_text_file(sample_dict, file_path): + + def __fetch_id_positions__(all_ids, target_ids): + _vals = [] + _posit = [0] # alternative for parsing + + for (idx, strain) in enumerate(all_ids, 1): + if strain in target_ids: + _vals.append(target_ids[strain]) + _posit.append(idx) + + return (_posit, _vals) + + with open(file_path) as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') + _posit, sample_vals = __fetch_id_positions__( + next(csv_reader)[1:], sample_dict) + return (sample_vals, [",".join([line[i] for i in _posit]) for line in csv_reader]) diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py index cfa360d0..251ada7b 100644 --- a/wqflask/wqflask/correlation/rust_correlation.py +++ b/wqflask/wqflask/correlation/rust_correlation.py @@ -8,6 +8,8 @@ from wqflask.correlation.correlation_functions\ from wqflask.correlation.correlation_gn3_api import create_target_this_trait from wqflask.correlation.correlation_gn3_api import lit_for_trait_list from wqflask.correlation.correlation_gn3_api import do_lit_correlation +from wqflask.correlation.pre_computes import fetch_text_file +from wqflask.correlation.pre_computes import read_text_file from gn3.computations.correlations import compute_all_lit_correlation from gn3.computations.rust_correlation import run_correlation from gn3.computations.rust_correlation import get_sample_corr_data @@ -219,6 +221,15 @@ def __compute_sample_corr__( sample_data = get_sample_corr_data( sample_type=start_vars["corr_samples_group"], all_samples=all_samples, dataset_samples=this_dataset.group.all_samples_ordered()) + + if target_dataset.type == "ProbeSet": + with database_connector() as conn: + file_path = fetch_text_file(target_dataset.name, conn) + if file_path: + (sample_vals, target_data) = read_text_file( + sample_data, file_path) + return run_correlation(target_data, sample_vals, method, ",", corr_type, n_top) + target_dataset.get_trait_data(list(sample_data.keys())) target_data = [] |