aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander_Kabui2023-04-18 19:24:28 +0300
committerAlexander_Kabui2023-04-18 19:24:28 +0300
commitbd29507b356840f3b1742548502b774a520c78b3 (patch)
tree76a7c3991e525068f6d35b5dafb106fe8c27cc5e
parent66e8c558cfce8cab5db22fdc50fe56ef1036cf82 (diff)
downloadgenenetwork2-bd29507b356840f3b1742548502b774a520c78b3.tar.gz
metadata caching code integration
-rw-r--r--wqflask/wqflask/correlation/rust_correlation.py37
1 files changed, 19 insertions, 18 deletions
diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py
index 67bd5ff5..41dd77a1 100644
--- a/wqflask/wqflask/correlation/rust_correlation.py
+++ b/wqflask/wqflask/correlation/rust_correlation.py
@@ -13,6 +13,8 @@ from wqflask.correlation.correlation_gn3_api import do_lit_correlation
from wqflask.correlation.pre_computes import fetch_text_file
from wqflask.correlation.pre_computes import read_text_file
from wqflask.correlation.pre_computes import write_db_to_textfile
+from wqflask.correlation.pre_computes import read_trait_metadata
+from wqflask.correlation.pre_computes import cache_trait_metadata
from gn3.computations.correlations import compute_all_lit_correlation
from gn3.computations.rust_correlation import run_correlation
from gn3.computations.rust_correlation import get_sample_corr_data
@@ -25,7 +27,7 @@ from wqflask.correlation.exceptions import WrongCorrelationType
def query_probes_metadata(dataset, trait_list):
"""query traits metadata in bulk for probeset"""
- if not bool(trait_list) or dataset.type!="ProbeSet":
+ if not bool(trait_list) or dataset.type != "ProbeSet":
return []
with database_connection(SQL_URI) as conn:
@@ -63,8 +65,11 @@ def get_metadata(dataset, traits):
if probe_mb:
return f"Chr{probe_chr}: {probe_mb:.6f}"
return f"Chr{probe_chr}: ???"
-
- return {trait_name: {
+ cached_metadata = read_trait_metadata(dataset.name)
+ to_fetch_metadata = list(
+ set(traits).difference(list(cached_metadata.keys())))
+ if to_fetch_metadata:
+ results = {**({trait_name: {
"name": trait_name,
"view": True,
"symbol": symbol,
@@ -77,13 +82,16 @@ def get_metadata(dataset, traits):
"location": __location__(probe_chr, probe_mb),
"chr": probe_chr,
"mb": probe_mb,
- "lrs_location":f'Chr{chr_score}: {mb:{".6f" if mb else ""}}',
+ "lrs_location": f'Chr{chr_score}: {mb:{".6f" if mb else ""}}',
"lrs_chr": chr_score,
"lrs_mb": mb
- } for trait_name, probe_chr, probe_mb, symbol, mean, description,
+ } for trait_name, probe_chr, probe_mb, symbol, mean, description,
additive, lrs, chr_score, mb
- in query_probes_metadata(dataset, traits)}
+ in query_probes_metadata(dataset, to_fetch_metadata)}), **cached_metadata}
+ cache_trait_metadata(dataset.name, results)
+ return results
+ return cached_metadata
def chunk_dataset(dataset, steps, name):
@@ -235,21 +243,20 @@ def __compute_sample_corr__(
"""Compute the sample correlations"""
(this_dataset, this_trait, target_dataset, sample_data) = target_trait_info
- if this_dataset.group.f1list !=None:
- this_dataset.group.samplelist+= this_dataset.group.f1list
+ if this_dataset.group.f1list != None:
+ this_dataset.group.samplelist += this_dataset.group.f1list
- if this_dataset.group.parlist!= None:
- this_dataset.group.samplelist+= this_dataset.group.parlist
+ if this_dataset.group.parlist != None:
+ this_dataset.group.samplelist += this_dataset.group.parlist
sample_data = get_sample_corr_data(
sample_type=start_vars["corr_samples_group"],
- sample_data= json.loads(start_vars["sample_vals"]),
+ sample_data=json.loads(start_vars["sample_vals"]),
dataset_samples=this_dataset.group.all_samples_ordered())
if not bool(sample_data):
return {}
-
if target_dataset.type == "ProbeSet" and start_vars.get("use_cache") == "true":
with database_connection(SQL_URI) as conn:
file_path = fetch_text_file(target_dataset.name, conn)
@@ -257,23 +264,18 @@ def __compute_sample_corr__(
(sample_vals, target_data) = read_text_file(
sample_data, file_path)
-
return run_correlation(target_data, sample_vals,
method, ",", corr_type, n_top)
-
write_db_to_textfile(target_dataset.name, conn)
file_path = fetch_text_file(target_dataset.name, conn)
if file_path:
(sample_vals, target_data) = read_text_file(
sample_data, file_path)
-
return run_correlation(target_data, sample_vals,
method, ",", corr_type, n_top)
-
-
target_dataset.get_trait_data(list(sample_data.keys()))
def __merge_key_and_values__(rows, current):
@@ -288,7 +290,6 @@ def __compute_sample_corr__(
if len(target_data) == 0:
return {}
-
return run_correlation(
target_data, list(sample_data.values()), method, ",", corr_type,
n_top)