diff options
author | Frederick Muriuki Muriithi | 2022-09-20 21:24:53 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-09-20 21:24:53 +0300 |
commit | 3a8d99868cbc03e5ad6edced016504ed549ef468 (patch) | |
tree | 2de48ac76b4e1a25e04246f51ff14e29f4db2d69 | |
parent | 8f732461b897a7c229c3b49a74fd831c2e440989 (diff) | |
parent | 3458808a65d5e55644ea23aa00982973230ac556 (diff) | |
download | genenetwork2-3a8d99868cbc03e5ad6edced016504ed549ef468.tar.gz |
Merge branch 'Alexanderlacuna-feature/generate-text-files' into testing
-rw-r--r-- | wqflask/wqflask/correlation/pre_computes.py | 59 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/rust_correlation.py | 15 |
2 files changed, 69 insertions, 5 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index 1c52a0f5..afcea88f 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -171,11 +171,12 @@ def get_datasets_data(base_dataset, target_dataset_data): return (target_results, base_results) -def fetch_text_file(dataset_name, conn, text_dir=TEXTDIR): +def fetch_text_file(dataset_name, conn, text_dir=TMPDIR): """fetch textfiles with strain vals if exists""" with conn.cursor() as cursor: - cursor.execute('SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,)) + cursor.execute( + 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (dataset_name,)) results = cursor.fetchone() if results: try: @@ -204,3 +205,57 @@ def read_text_file(sample_dict, file_path): _posit, sample_vals = __fetch_id_positions__( next(csv_reader)[1:], sample_dict) return (sample_vals, [",".join([line[i] for i in _posit]) for line in csv_reader]) + + +def write_db_to_textfile(db_name, conn, text_dir=TMPDIR): + + def __generate_file_name__(db_name): + # todo add expiry time and checker + with conn.cursor() as cursor: + cursor.execute( + 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = %s', (db_name,)) + results = cursor.fetchone() + if (results): + return f"ProbeSetFreezeId_{results[0]}_{results[1]}" + + def __parse_to_dict__(results): + ids = ["ID"] + data = {} + for (trait, strain, val) in results: + if strain not in ids: + ids.append(strain) + if trait in data: + data[trait].append(val) + else: + data[trait] = [trait, val] + return (data, ids) + + def __write_to_file__(file_path, data, col_names): + with open(file_path, 'w+', encoding='UTF8') as file_handler: + + writer = csv.writer(file_handler) + writer.writerow(col_names) + writer.writerows(data.values()) + with conn.cursor() as cursor: + cursor.execute( + "SELECT ProbeSet.Name,Strain.Name, ProbeSetData.value " + "FROM (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, " + "ProbeSetXRef) LEFT JOIN ProbeSetSE ON " + "(ProbeSetSE.DataId = ProbeSetData.Id AND " + "ProbeSetSE.StrainId = ProbeSetData.StrainId) " + "LEFT JOIN NStrain ON " + "(NStrain.DataId = ProbeSetData.Id AND " + "NStrain.StrainId = ProbeSetData.StrainId) " + "WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s AND " + "ProbeSetXRef.DataId = ProbeSetData.Id " + "AND ProbeSetData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (db_name,)) + results = cursor.fetchall() + file_name = __generate_file_name__( + db_name) + if (results and file_name): + __write_to_file__(os.path.join(text_dir, file_name), + *__parse_to_dict__(results)) diff --git a/wqflask/wqflask/correlation/rust_correlation.py b/wqflask/wqflask/correlation/rust_correlation.py index 5b39c871..d9193459 100644 --- a/wqflask/wqflask/correlation/rust_correlation.py +++ b/wqflask/wqflask/correlation/rust_correlation.py @@ -10,6 +10,7 @@ from wqflask.correlation.correlation_gn3_api import lit_for_trait_list from wqflask.correlation.correlation_gn3_api import do_lit_correlation from wqflask.correlation.pre_computes import fetch_text_file from wqflask.correlation.pre_computes import read_text_file +from wqflask.correlation.pre_computes import write_db_to_textfile from gn3.computations.correlations import compute_all_lit_correlation from gn3.computations.rust_correlation import run_correlation from gn3.computations.rust_correlation import get_sample_corr_data @@ -195,7 +196,7 @@ def compute_top_n_tissue(this_dataset, this_trait, traits, method): symbol_dict=get_trait_symbol_and_tissue_values( symbol_list=[this_trait.symbol]), dataset_symbols=trait_symbol_dict, - dataset_vals=corr_result_tissue_vals_dict) + dataset_vals=corr_result_tissue_vals_dict) if data and data[0]: return run_correlation( @@ -237,7 +238,15 @@ def __compute_sample_corr__( if file_path: (sample_vals, target_data) = read_text_file( sample_data, file_path) - return run_correlation(target_data, sample_vals, method, ",", corr_type, n_top) + return run_correlation(target_data, sample_vals, + method, ",", corr_type, n_top) + write_db_to_textfile(target_dataset.name, conn) + file_path = fetch_text_file(target_dataset.name, conn) + if file_path: + (sample_vals, target_data) = read_text_file( + sample_data, file_path) + return run_correlation(target_data, sample_vals, + method, ",", corr_type, n_top) target_dataset.get_trait_data(list(sample_data.keys())) @@ -248,7 +257,7 @@ def __compute_sample_corr__( target_data.append(r) if len(target_data) == 0: - return {} + return {} return run_correlation( target_data, list(sample_data.values()), method, ",", corr_type, |