aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander_Kabui2022-08-31 01:43:33 +0300
committerAlexander_Kabui2022-08-31 18:13:46 +0300
commit6652c3137ec477e02c8291ee6154f24cc3a4cef2 (patch)
tree4fa64211b6f501bfdc10a97a844885f7bd09a0b3
parent0718d987f1b15838d45a9c414ec6d2318ed65e90 (diff)
downloadgenenetwork2-6652c3137ec477e02c8291ee6154f24cc3a4cef2.tar.gz
read and parse text files
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py44
1 files changed, 40 insertions, 4 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 95047fc7..eb089a03 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -4,6 +4,7 @@ import hashlib
from pathlib import Path
from base.data_set import query_table_timestamp
+from base.webqtlConfig import TEXTDIR
from base.webqtlConfig import TMPDIR
from json.decoder import JSONDecodeError
@@ -168,13 +169,48 @@ def get_datasets_data(base_dataset, target_dataset_data):
return (target_results, base_results)
-def fetch_text_file(dataset_name, text_dir, conn):
+
+def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR):
"""fetch textfiles with strain vals if exists"""
+
with conn.cursor() as cursor:
query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % dataset_name
cursor.execute(query)
results = cursor.fetchone()
if (results):
- for file in os.listdir(text_dir):
- if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
- return os.path.join(text_dir, file)
+ try:
+ for file in os.listdir(text_dir):
+ if file.startswith(f"ProbeSetFreezeId_{results[0]}_"):
+ return os.path.join(text_dir, file)
+ except FileNotFoundError:
+ pass
+
+
+def read_text_file(sample_dict, file_path):
+
+ def parse_line_csv(line):
+ return_list = line.split('","')
+ return_list[-1] = return_list[-1][:-2]
+ return_list[0] = return_list[0][1:]
+ return return_list
+
+ def __fetch_id_positions__(all_ids, target_ids):
+ _vals = []
+ _posit = [0] # alternative for parsing
+
+ for (idx, strain) in enumerate(all_ids, 1):
+ if strain in target_ids:
+ _vals.append(target_ids[strain])
+ _posit.append(idx)
+
+ else:
+ _vals.append("") # todo;modify x_vals to take string rust
+
+ return (_posit, _vals)
+ with open(file_path, "r") as file_handler:
+ all_ids = file_handler.readline()
+ _posit, sample_vals = __fetch_id_positions__(
+ parse_line_csv(all_ids)[1:], sample_dict)
+
+ return (sample_vals, [",".join(parse_line_csv(line))
+ for line in file_handler.readlines()])