diff options
Diffstat (limited to 'gn3/computations')
| -rw-r--r-- | gn3/computations/rust_correlation.py | 84 | 
1 files changed, 73 insertions, 11 deletions
| diff --git a/gn3/computations/rust_correlation.py b/gn3/computations/rust_correlation.py index 380cff1..57c1b12 100644 --- a/gn3/computations/rust_correlation.py +++ b/gn3/computations/rust_correlation.py @@ -36,17 +36,21 @@ def generate_json_file(tmp_dir, tmp_file, method, delimiter, x_vals) -> str: tmp_json_file = os.path.join(tmp_dir, f"{random_string(10)}.json") + output_file = os.path.join(tmp_dir, f"{random_string(10)}.txt") + correlation_args = { "method": method, "file_path": tmp_file, "x_vals": x_vals, + "sample_values": "bxd1", + "output_file": output_file, "file_delimiter": delimiter } with open(tmp_json_file, "w", encoding="utf-8") as outputfile: json.dump(correlation_args, outputfile) - return tmp_json_file + return (output_file, tmp_json_file) def run_correlation(dataset, trait_vals: @@ -57,31 +61,89 @@ def run_correlation(dataset, trait_vals: (tmp_dir, tmp_file) = generate_input_files(dataset) - json_file = generate_json_file(tmp_dir=tmp_dir, tmp_file=tmp_file, - method=method, delimiter=delimiter, - x_vals=trait_vals) + (output_file, json_file) = generate_json_file(tmp_dir=tmp_dir, + tmp_file=tmp_file, + method=method, + delimiter=delimiter, + x_vals=trait_vals) command_list = [CORRELATION_COMMAND, json_file, TMPDIR] - return subprocess.run(command_list, check=True) + rls = subprocess.run(command_list, check=True) + + rs = parse_correlation_output(output_file,10000) + + return rs -def parse_correlation_output(result_file: str) -> list[dict]: +def parse_correlation_output(result_file: str, top_n: int = 500) -> list[dict]: """parse file output """ corr_results = [] with open(result_file, "r", encoding="utf-8") as file_reader: - for line in file_reader: + lines = [next(file_reader) for x in range(top_n)] + + for line in lines: (trait_name, corr_coeff, p_val) = line.rstrip().split(",") corr_data = { - "trait_name": trait_name, - "corr_coeff": corr_coeff, - "p_val": p_val + "num_overlap": 00, # to be later fixed + "corr_coefficient": corr_coeff, + "p_value": p_val } - corr_results.append(corr_data) + corr_results.append({trait_name: corr_data}) return corr_results + + + + +# computation specific;sample_r,lit_corr +def compute_top_n(first_run_results,init_type,dataset_1,dataset_2,dataset_type:str): + if dataset__type.lower()!= "probeset": + return first_run_results + + if init_type == "sample": + # do both lit and tissue + + results_a = run_correlation(dataset_1, x_vals_1,method,delimiter) + + results_b = lit_correlation_for_trait(unkown) + + + # question how do we merge this + + + + + + if init_type == "tissue": + # do sample and tissue + + + file_a = run_correlation(dataset_1,x_vals_1,method,delimiter) + + result_b = lit_correlation_for_trait(unkown) + + # merge the results + + + + if init_type == "lit": + + file_a = run_correlation() + + file_b = run_correlation() + + join <(file_a) <(file_b) + + # do the merge here + # do both sample and tissue + + + + + | 
