about summary refs log tree commit diff
path: root/gn3/computations
diff options
context:
space:
mode:
authorAlexander_Kabui2022-07-07 20:07:45 +0300
committerBonfaceKilz2022-07-22 14:52:08 +0300
commitb529c7003d01f216be5999a19950d39d611759fe (patch)
tree8c3a2a89fb1f360e1cac7928a97d70f62553c358 /gn3/computations
parente8be94e51942d63c05d5b6a229590a90ec620ac2 (diff)
downloadgenenetwork3-b529c7003d01f216be5999a19950d39d611759fe.tar.gz
minor fixes
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/rust_correlation.py84
1 files changed, 73 insertions, 11 deletions
diff --git a/gn3/computations/rust_correlation.py b/gn3/computations/rust_correlation.py
index 380cff1..57c1b12 100644
--- a/gn3/computations/rust_correlation.py
+++ b/gn3/computations/rust_correlation.py
@@ -36,17 +36,21 @@ def generate_json_file(tmp_dir, tmp_file, method, delimiter, x_vals) -> str:
 
     tmp_json_file = os.path.join(tmp_dir, f"{random_string(10)}.json")
 
+    output_file = os.path.join(tmp_dir, f"{random_string(10)}.txt")
+
     correlation_args = {
         "method": method,
         "file_path": tmp_file,
         "x_vals": x_vals,
+        "sample_values": "bxd1",
+        "output_file": output_file,
         "file_delimiter": delimiter
     }
 
     with open(tmp_json_file, "w", encoding="utf-8") as outputfile:
         json.dump(correlation_args, outputfile)
 
-    return tmp_json_file
+    return (output_file, tmp_json_file)
 
 
 def run_correlation(dataset, trait_vals:
@@ -57,31 +61,89 @@ def run_correlation(dataset, trait_vals:
 
     (tmp_dir, tmp_file) = generate_input_files(dataset)
 
-    json_file = generate_json_file(tmp_dir=tmp_dir, tmp_file=tmp_file,
-                                   method=method, delimiter=delimiter,
-                                   x_vals=trait_vals)
+    (output_file, json_file) = generate_json_file(tmp_dir=tmp_dir,
+                                                  tmp_file=tmp_file,
+                                                  method=method,
+                                                  delimiter=delimiter,
+                                                  x_vals=trait_vals)
 
     command_list = [CORRELATION_COMMAND, json_file, TMPDIR]
 
-    return subprocess.run(command_list, check=True)
+    rls = subprocess.run(command_list, check=True)
+
+    rs = parse_correlation_output(output_file,10000)
+
+    return rs
 
 
-def parse_correlation_output(result_file: str) -> list[dict]:
+def parse_correlation_output(result_file: str, top_n: int = 500) -> list[dict]:
     """parse file output """
 
     corr_results = []
 
     with open(result_file, "r", encoding="utf-8") as file_reader:
 
-        for line in file_reader:
+        lines = [next(file_reader) for x in range(top_n)]
+
+        for line in lines:
 
             (trait_name, corr_coeff, p_val) = line.rstrip().split(",")
             corr_data = {
-                "trait_name": trait_name,
-                "corr_coeff": corr_coeff,
-                "p_val": p_val
+                "num_overlap": 00,  # to be later fixed
+                "corr_coefficient": corr_coeff,
+                "p_value": p_val
             }
 
-            corr_results.append(corr_data)
+            corr_results.append({trait_name: corr_data})
 
     return corr_results
+
+
+
+
+# computation specific;sample_r,lit_corr
+def compute_top_n(first_run_results,init_type,dataset_1,dataset_2,dataset_type:str):
+    if dataset__type.lower()!= "probeset":
+        return first_run_results
+
+    if  init_type == "sample":
+        # do both lit and tissue
+
+        results_a = run_correlation(dataset_1, x_vals_1,method,delimiter)
+
+        results_b = lit_correlation_for_trait(unkown)
+
+
+        # question how do we merge this
+
+
+
+
+
+    if  init_type == "tissue":
+        # do sample and tissue
+
+
+        file_a  =  run_correlation(dataset_1,x_vals_1,method,delimiter)
+
+        result_b = lit_correlation_for_trait(unkown)
+
+        # merge the results
+
+
+
+    if  init_type == "lit":
+
+        file_a  = run_correlation()
+
+        file_b = run_correlation()
+
+        join <(file_a) <(file_b)
+
+    # do the merge here
+        # do both  sample and tissue
+
+
+
+
+