aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations/rust_correlation.py
diff options
context:
space:
mode:
authorAlexander Kabui2022-10-22 17:08:33 +0300
committerGitHub2022-10-22 17:08:33 +0300
commit544eae96a21848fbf400fa65b3eca40c0fc8fb87 (patch)
treef7e24e082e3a2d3043f5c0ea4cbf2d33b73be64d /gn3/computations/rust_correlation.py
parent06da0390a1de5d0aa8eb6d7a0ed3120e350f8a0b (diff)
downloadgenenetwork3-544eae96a21848fbf400fa65b3eca40c0fc8fb87.tar.gz
fix issue;parsing sample data (#102)
Diffstat (limited to 'gn3/computations/rust_correlation.py')
-rw-r--r--gn3/computations/rust_correlation.py14
1 files changed, 8 insertions, 6 deletions
diff --git a/gn3/computations/rust_correlation.py b/gn3/computations/rust_correlation.py
index dde6188..644c73b 100644
--- a/gn3/computations/rust_correlation.py
+++ b/gn3/computations/rust_correlation.py
@@ -48,12 +48,13 @@ def generate_json_file(
return (output_file, tmp_json_file)
+
def run_correlation(
dataset, trait_vals: str, method: str, delimiter: str,
corr_type: str = "sample", top_n: int = 500):
"""entry function to call rust correlation"""
- #pylint: disable=too-many-arguments
+ # pylint: disable=too-many-arguments
(tmp_dir, tmp_file) = generate_input_files(dataset)
(output_file, json_file) = generate_json_file(
tmp_dir=tmp_dir, tmp_file=tmp_file, method=method, delimiter=delimiter,
@@ -74,7 +75,7 @@ def run_correlation(
def parse_correlation_output(result_file: str,
corr_type: str, top_n: int = 500) -> dict:
"""parse file output """
- #current types are sample and tissue
+ # current types are sample and tissue
def __parse_line__(line):
(trait_name, corr_coeff, p_val, num_overlap) = line.rstrip().split(",")
if corr_type == "sample":
@@ -126,23 +127,24 @@ def get_samples(all_samples: dict[str, str],
def get_sample_corr_data(sample_type: str,
- all_samples: dict[str, str],
+ sample_data: dict[str, str],
+ all_samples: list[str],
dataset_samples: list[str]) -> dict[str, str]:
"""dependeing on the sample_type fetch the correct sample data """
if sample_type == "samples_primary":
- data = get_samples(all_samples=all_samples,
+ data = get_samples(all_samples=sample_data,
base_samples=dataset_samples, excluded=[])
elif sample_type == "samples_other":
data = get_samples(
- all_samples=all_samples,
+ all_samples=sample_data,
base_samples=[],
excluded=dataset_samples)
else:
data = get_samples(
- all_samples=all_samples, base_samples=[], excluded=[])
+ all_samples=sample_data, base_samples=all_samples, excluded=[])
return data