about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander_Kabui2025-01-08 11:58:28 +0300
committerAlexander_Kabui2025-01-08 11:58:28 +0300
commit69c92461d8821d373d2bbb7779ec63e85da2b6a0 (patch)
tree60587812abb7cdda93aa7e7ac3cbeef5b9116869
parent326004d76a317e5a1dfdb595bc0fe2f21c5b2f87 (diff)
downloadgenenetwork3-69c92461d8821d373d2bbb7779ec63e85da2b6a0.tar.gz
refactor: Pass arg commands to subprocess as a list.
* Use with open context for subprocess.
* Correctly handle subprocess error.
-rw-r--r--gn3/api/rqtl.py120
1 files changed, 77 insertions, 43 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index 1139d9e..ad918d8 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -1,4 +1,5 @@
 """Endpoints for running the rqtl cmd"""
+
 import os
 import uuid
 import subprocess
@@ -9,8 +10,12 @@ from flask import current_app
 from flask import jsonify
 from flask import request
 
-from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \
-                                  process_rqtl_pairscan, process_perm_output
+from gn3.computations.rqtl import (
+    generate_rqtl_cmd,
+    process_rqtl_mapping,
+    process_rqtl_pairscan,
+    process_perm_output,
+)
 from gn3.fs_helpers import assert_path_exists, get_tmpdir
 
 rqtl = Blueprint("rqtl", __name__)
@@ -19,17 +24,20 @@ rqtl = Blueprint("rqtl", __name__)
 @rqtl.route("/compute", methods=["POST"])
 def compute():
     """Given at least a geno_file and pheno_file, generate and
-run the rqtl_wrapper script and return the results as JSON
+    run the rqtl_wrapper script and return the results as JSON
 
     """
-    genofile = request.form['geno_file']
-    phenofile = request.form['pheno_file']
+    genofile = request.form["geno_file"]
+    phenofile = request.form["pheno_file"]
     assert_path_exists(genofile)
     assert_path_exists(phenofile)
 
     run_id = request.args.get("id")
-    with open(os.path.join(current_app.config.get("TMPDIR"),
-                           f"{run_id}.txt"), "w+", encoding="utf-8"):
+    with open(
+        os.path.join(current_app.config.get("TMPDIR"), f"{run_id}.txt"),
+        "w+",
+        encoding="utf-8",
+    ):
         # TODO thos should  be refactored
         pass
     # Split kwargs by those with values and boolean ones
@@ -38,7 +46,11 @@ run the rqtl_wrapper script and return the results as JSON
     boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"]
     all_kwargs = kwargs + boolean_kwargs
 
-    rqtl_kwargs = {"geno": genofile, "pheno": phenofile, "outdir": current_app.config.get("TMPDIR")}
+    rqtl_kwargs = {
+        "geno": genofile,
+        "pheno": phenofile,
+        "outdir": current_app.config.get("TMPDIR"),
+    }
     rqtl_bool_kwargs = []
 
     for kwarg in all_kwargs:
@@ -54,55 +66,77 @@ run the rqtl_wrapper script and return the results as JSON
 
     rqtl_cmd = generate_rqtl_cmd(
         rqtl_wrapper_cmd=str(
-            Path(__file__).absolute().parent.parent.parent.joinpath(
-                'scripts/rqtl_wrapper.R')),
+            Path(__file__)
+            .absolute()
+            .parent.parent.parent.joinpath("scripts/rqtl_wrapper.R")
+        ),
         rqtl_wrapper_kwargs=rqtl_kwargs,
-        rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs
+        rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs,
     )
 
     rqtl_output = {}
     #  get the stdout file
     run_id = request.args.get("id", str(uuid.uuid4()))
-    if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"),
-                                       "gn3", rqtl_cmd.get('output_file'))):
+    if not os.path.isfile(
+        os.path.join(
+            current_app.config.get("TMPDIR"), "gn3", rqtl_cmd.get("output_file")
+        )
+    ):
         pass
-    stream_ouput_file = os.path.join(current_app.config.get("TMPDIR"),
-                                         f"{run_id}.txt")
+    stream_ouput_file = os.path.join(current_app.config.get("TMPDIR"), f"{run_id}.txt")
 
-    run_process(rqtl_cmd.get("rqtl_cmd"), stream_ouput_file, run_id)
+    run_process(rqtl_cmd.get("rqtl_cmd").split(), stream_ouput_file, run_id)
 
     if "pairscan" in rqtl_bool_kwargs:
-        rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile)
+        rqtl_output["results"] = process_rqtl_pairscan(
+            rqtl_cmd.get("output_file"), genofile
+        )
     else:
-        rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file'))
+        rqtl_output["results"] = process_rqtl_mapping(rqtl_cmd.get("output_file"))
 
-    if int(rqtl_kwargs['nperm']) > 0:
+    if int(rqtl_kwargs["nperm"]) > 0:
         # pylint: disable=C0301
-        rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file'))
-
+        (
+            rqtl_output["perm_results"],
+            rqtl_output["suggestive"],
+            rqtl_output["significant"],
+        ) = process_perm_output(rqtl_cmd.get("output_file"))
     return jsonify(rqtl_output)
 
 
-def run_process(rscript_cmd, output_file, run_id):
-    """Main function to do the streaming"""
-    # TODO: move this function to own file
-    # pylint: disable=consider-using-with
-    process = subprocess.Popen(
-        rscript_cmd, shell=True,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT
-    )
-    for line in iter(process.stdout.readline, b""):
-        # these allow endpoint stream to read the file since
-        # no read and write file same tiem
-        with open(output_file, "a+", encoding="utf-8") as file_handler:
-            file_handler.write(line.decode("utf-8"))
-    process.stdout.close()
-    process.wait()
-    if process.returncode == 0:
-        return jsonify({"msg": "success",
-                        "results": "file_here",
+def run_process(cmd, output_file, run_id):
+    """Function to execute an external process and
+       capture the stdout in a file
+      input:
+           cmd: the command to execute as a list of args.
+           output_file: abs file path to write the stdout.
+           run_id: unique id to identify the process
+
+      output:
+          Dict with the results o either success or failure.
+    """
+    try:
+        # phase: execute the  rscript cmd
+        with subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+        ) as process:
+            for line in iter(process.stdout.readline, b""):
+                # phase: capture the stdout for eaching line allowing read and write
+                with open(output_file, "a+", encoding="utf-8") as file_handler:
+                    file_handler.write(line.decode("utf-8"))
+            process.wait()
+        if process.returncode == 0:
+            return jsonify({"msg": "success",
+                            "results": "file_here",
+                            "run_id": run_id})
+
+        return jsonify({"msg": "fail",
+                        "error": "Process failed",
+                        "run_id": run_id})
+
+    except subprocess.CalledProcessError as error:
+        return jsonify({"msg": "fail",
+                        "error": str(error),
                         "run_id": run_id})
-    return jsonify({"msg": "fail",
-                    "error": "Process failed",
-                    "run_id": run_id})