aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander_Kabui2022-07-07 20:07:45 +0300
committerBonfaceKilz2022-07-22 14:52:08 +0300
commitb529c7003d01f216be5999a19950d39d611759fe (patch)
tree8c3a2a89fb1f360e1cac7928a97d70f62553c358
parente8be94e51942d63c05d5b6a229590a90ec620ac2 (diff)
downloadgenenetwork3-b529c7003d01f216be5999a19950d39d611759fe.tar.gz
minor fixes
-rw-r--r--gn3/computations/rust_correlation.py84
1 files changed, 73 insertions, 11 deletions
diff --git a/gn3/computations/rust_correlation.py b/gn3/computations/rust_correlation.py
index 380cff1..57c1b12 100644
--- a/gn3/computations/rust_correlation.py
+++ b/gn3/computations/rust_correlation.py
@@ -36,17 +36,21 @@ def generate_json_file(tmp_dir, tmp_file, method, delimiter, x_vals) -> str:
tmp_json_file = os.path.join(tmp_dir, f"{random_string(10)}.json")
+ output_file = os.path.join(tmp_dir, f"{random_string(10)}.txt")
+
correlation_args = {
"method": method,
"file_path": tmp_file,
"x_vals": x_vals,
+ "sample_values": "bxd1",
+ "output_file": output_file,
"file_delimiter": delimiter
}
with open(tmp_json_file, "w", encoding="utf-8") as outputfile:
json.dump(correlation_args, outputfile)
- return tmp_json_file
+ return (output_file, tmp_json_file)
def run_correlation(dataset, trait_vals:
@@ -57,31 +61,89 @@ def run_correlation(dataset, trait_vals:
(tmp_dir, tmp_file) = generate_input_files(dataset)
- json_file = generate_json_file(tmp_dir=tmp_dir, tmp_file=tmp_file,
- method=method, delimiter=delimiter,
- x_vals=trait_vals)
+ (output_file, json_file) = generate_json_file(tmp_dir=tmp_dir,
+ tmp_file=tmp_file,
+ method=method,
+ delimiter=delimiter,
+ x_vals=trait_vals)
command_list = [CORRELATION_COMMAND, json_file, TMPDIR]
- return subprocess.run(command_list, check=True)
+ rls = subprocess.run(command_list, check=True)
+
+ rs = parse_correlation_output(output_file,10000)
+
+ return rs
-def parse_correlation_output(result_file: str) -> list[dict]:
+def parse_correlation_output(result_file: str, top_n: int = 500) -> list[dict]:
"""parse file output """
corr_results = []
with open(result_file, "r", encoding="utf-8") as file_reader:
- for line in file_reader:
+ lines = [next(file_reader) for x in range(top_n)]
+
+ for line in lines:
(trait_name, corr_coeff, p_val) = line.rstrip().split(",")
corr_data = {
- "trait_name": trait_name,
- "corr_coeff": corr_coeff,
- "p_val": p_val
+ "num_overlap": 00, # to be later fixed
+ "corr_coefficient": corr_coeff,
+ "p_value": p_val
}
- corr_results.append(corr_data)
+ corr_results.append({trait_name: corr_data})
return corr_results
+
+
+
+
+# computation specific;sample_r,lit_corr
+def compute_top_n(first_run_results,init_type,dataset_1,dataset_2,dataset_type:str):
+ if dataset__type.lower()!= "probeset":
+ return first_run_results
+
+ if init_type == "sample":
+ # do both lit and tissue
+
+ results_a = run_correlation(dataset_1, x_vals_1,method,delimiter)
+
+ results_b = lit_correlation_for_trait(unkown)
+
+
+ # question how do we merge this
+
+
+
+
+
+ if init_type == "tissue":
+ # do sample and tissue
+
+
+ file_a = run_correlation(dataset_1,x_vals_1,method,delimiter)
+
+ result_b = lit_correlation_for_trait(unkown)
+
+ # merge the results
+
+
+
+ if init_type == "lit":
+
+ file_a = run_correlation()
+
+ file_b = run_correlation()
+
+ join <(file_a) <(file_b)
+
+ # do the merge here
+ # do both sample and tissue
+
+
+
+
+