aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py92
1 files changed, 89 insertions, 3 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 720eab09..7a1690dd 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -7,11 +7,13 @@ import datetime
import lmdb
import pickle
from pathlib import Path
-
+from gn3.db_utils import database_connection
from base.data_set import query_table_timestamp
from base.webqtlConfig import TEXTDIR
from base.webqtlConfig import TMPDIR
+from utility.tools import SQL_URI
+
from json.decoder import JSONDecodeError
def cache_trait_metadata(dataset_name, data):
@@ -30,8 +32,7 @@ def cache_trait_metadata(dataset_name, data):
def read_trait_metadata(dataset_name,dataset_type):
try:
- with lmdb.open(os.path.join("/tmp/",f"metadata_{dataset_type}"),
- readonly=True, lock=False) as env:
+ with lmdb.open(os.path.join("/tmp/",f"metadata_{dataset_type}"), readonly=True, lock=False) as env:
with env.begin() as txn:
metadata = txn.get(dataset_name.encode())
return (pickle.loads(metadata)["data"] if metadata else {})
@@ -39,6 +40,88 @@ def read_trait_metadata(dataset_name,dataset_type):
return {}
+
+def parse_lmdb_dataset(strain_names,target_strains,data):
+ _vals = []
+ _posit = [0]
+ def __fetch_id_positions__(all_ids, target_ids):
+ _vals = []
+ _posit = [0] # alternative for parsing
+
+ for (idx, strain) in enumerate(strain_names, 1):
+ if strain in target_strains:
+ _vals.append(target_strains[strain])
+ _posit.append(idx)
+
+ return (_posit, _vals)
+ _posit,sample_vals = __fetch_id_positions__(strain_names,target_strains)
+ return (sample_vals,[[line[i] for i in _posit] for line in data.values()])
+
+def read_lmdb_strain_files(dataset_type,dataset_name,sql_uri=SQL_URI):
+ # target file path for example probeset and name used to generate the name
+
+ def __sanitise_filename__(filename):
+ ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"})
+ return str.translate(filename, ttable)
+
+
+
+ def __generate_file_name__(db_name):
+ # todo add expiry time and checker
+
+ with database_connection() as conn:
+ with conn.cursor() as cursor:
+ cursor.execute(
+ 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (db_name,))
+ results = cursor.fetchone()
+ if (results):
+ return __sanitise_filename__(
+ f"ProbeSetFreezeId_{results[0]}_{results[1]}")
+ """
+
+ def __fetch_id_positions__(all_ids, target_ids):
+ _vals = []
+ _posit = [0] # alternative for parsing
+
+ for (idx, strain) in enumerate(all_ids, 1):
+ if strain in target_ids:
+ _vals.append(target_ids[strain])
+ _posit.append(idx)
+
+ return (_posit, _vals)
+
+ with open(file_path) as csv_file:
+ csv_reader = csv.reader(csv_file, delimiter=',')
+ _posit, sample_vals = __fetch_id_positions__(
+ next(csv_reader)[1:], sample_dict)
+ return (sample_vals, [[line[i] for i in _posit] for line in csv_reader])
+
+
+ """
+
+
+ try:
+ with lmdb.open(os.path.join("/tmp","Probesets"),readonly=True,lock=False) as env:
+ with env.begin() as txn:
+ filename = __generate_file_name__ (dataset_name)
+ if filename:
+ data = txn.get(filename.encode())
+
+
+ col_ids = pickle.loads(data)["data"]
+
+ data = pickle.loads(data)["strain_names"]
+
+ return (col_ids,data)
+
+ # parse
+
+ return {}
+
+ except Exception as error:
+ breakpoint()
+ return {}
+
def fetch_all_cached_metadata(dataset_name):
"""in a gvein dataset fetch all the traits metadata"""
file_name = generate_filename(dataset_name, suffix="metadata")
@@ -105,6 +188,9 @@ def fetch_text_file(dataset_name, conn, text_dir=TMPDIR):
pass
+
+
+
def read_text_file(sample_dict, file_path):
def __fetch_id_positions__(all_ids, target_ids):