From 644100ea2e5e407eb06ccb292d85dd20b104beee Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 1 Apr 2024 07:24:46 -0500 Subject: Getting Rqtl to run on fallback --- gn3/api/general.py | 2 +- gn3/api/rqtl.py | 5 +++-- gn3/commands.py | 2 ++ gn3/computations/rqtl.py | 8 +++++++- gn3/debug.py | 5 ++++- gn3/fs_helpers.py | 15 ++++++++++----- gn3/settings.py | 2 -- scripts/rqtl_wrapper.R | 8 +++++++- 8 files changed, 34 insertions(+), 13 deletions(-) diff --git a/gn3/api/general.py b/gn3/api/general.py index 69ec343..891f992 100644 --- a/gn3/api/general.py +++ b/gn3/api/general.py @@ -64,7 +64,7 @@ def run_r_qtl(geno_filestr, pheno_filestr): """Run r_qtl command using the written rqtl_wrapper program """ - rqtl_wrapper = current_app.config["RQTL_WRAPPER"] + rqtl_wrapper = 'scripts/rqtl_wrapper.R' cmd = (f"Rscript {rqtl_wrapper} " f"{geno_filestr} {pheno_filestr}") return jsonify(run_cmd(cmd)), 201 diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 3893275..36bc3da 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,6 +6,7 @@ from flask import current_app from flask import jsonify from flask import request +from gn3.debug import __pk__ from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \ process_rqtl_pairscan, process_perm_output from gn3.fs_helpers import assert_paths_exist @@ -38,7 +39,7 @@ run the rqtl_wrapper script and return the results as JSON rqtl_bool_kwargs.append(kwarg) rqtl_cmd = generate_rqtl_cmd( - rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), + rqtl_wrapper_cmd='scripts/rqtl_wrapper.R', rqtl_wrapper_kwargs=rqtl_kwargs, rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs ) @@ -46,7 +47,7 @@ run the rqtl_wrapper script and return the results as JSON rqtl_output = {} if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): - os.system(rqtl_cmd.get('rqtl_cmd')) + os.system(__pk__(rqtl_cmd.get('rqtl_cmd'))) if "pairscan" in rqtl_bool_kwargs: rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) diff --git a/gn3/commands.py b/gn3/commands.py index 79e1e7e..7a9ba67 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -154,12 +154,14 @@ def run_cmd(cmd: str, success_codes: Tuple = (0,), env: Optional[str] = None) -> """Run CMD and return the CMD's status code and output as a dict""" parsed_cmd = json.loads(cmd) parsed_env = (json.loads(env) if env is not None else None) + results = subprocess.run( parsed_cmd, capture_output=True, shell=isinstance(parsed_cmd, str), check=False, env=parsed_env) out = str(results.stdout, 'utf-8') if results.returncode not in success_codes: # Error! out = str(results.stderr, 'utf-8') + current_app.logger.debug(out) return {"code": results.returncode, "output": out} def run_async_cmd( diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index f082482..8b1b316 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,5 +1,7 @@ """Procedures related to R/qtl computations""" import os +import sys +import logging from bisect import bisect from typing import Dict, List, Tuple, Union @@ -9,8 +11,9 @@ from flask import current_app from gn3.commands import compose_rqtl_cmd from gn3.computations.gemma import generate_hash_of_string -from gn3.fs_helpers import get_hash_of_files +from gn3.fs_helpers import get_hash_of_files, assert_path_exists +from gn3.debug import __pk__ def generate_rqtl_cmd( rqtl_wrapper_cmd: str, @@ -21,6 +24,8 @@ def generate_rqtl_cmd( dict of keyword arguments, return the full rqtl_wrapper command and an output filename generated from a hash of the genotype and phenotype files""" + assert_path_exists(rqtl_wrapper_cmd) + # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]] @@ -60,6 +65,7 @@ def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects""" marker_obs = [] + # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] with open( diff --git a/gn3/debug.py b/gn3/debug.py index ccfcba1..acc2402 100644 --- a/gn3/debug.py +++ b/gn3/debug.py @@ -1,9 +1,12 @@ """Debug utilities""" import logging +from flask import current_app + logger = logging.getLogger(__name__) def __pk__(*args): value = args[-1] title_vals = " => ".join(args[0:-1]) - logger.debug("%s: %s", title_vals, value) + current_app.logger.setLevel(logging.DEBUG) # Force debug level since we assume we are using it! + current_app.logger.debug("%s: %s", title_vals, value) return value diff --git a/gn3/fs_helpers.py b/gn3/fs_helpers.py index 74e7ada..845c48b 100644 --- a/gn3/fs_helpers.py +++ b/gn3/fs_helpers.py @@ -13,14 +13,19 @@ from typing import List from typing import ValuesView from werkzeug.utils import secure_filename +def assert_path_exists(path: str, throw_error: bool = True) -> bool: + """Throw error if any of them do not exist.""" + if not os.path.isfile(path): + if throw_error: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) + else: + return False + return True + def assert_paths_exist(paths: ValuesView, throw_error: bool = True) -> bool: """Given a list of PATHS, throw error if any of them do not exist.""" for path in paths: - if not os.path.isfile(path): - if throw_error: - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) - else: - return False + assert_path_exists(path,throw_error) return True def get_hash_of_files(files: List[str]) -> str: diff --git a/gn3/settings.py b/gn3/settings.py index 1e794ff..04aa129 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -11,12 +11,10 @@ import tempfile BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this! DATA_DIR = "" GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper") -RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER") CACHEDIR = "" REDIS_URI = "redis://localhost:6379/0" REDIS_JOB_QUEUE = "GN3::job-queue" TMPDIR = os.environ.get("TMPDIR", tempfile.gettempdir()) -RQTL_WRAPPER = "rqtl_wrapper.R" # SPARQL endpoint SPARQL_ENDPOINT = os.environ.get( diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 2ac8faa..0d1f2ff 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -3,6 +3,12 @@ library(qtl) library(stringi) library(stringr) + +tmp_dir = Sys.getenv("TMPDIR") +if (!dir.exists(tmp_dir)) { + tmp_dir = "/tmp" +} + option_list = list( make_option(c("-g", "--geno"), type="character", help=".geno file containing a dataset's genotypes"), make_option(c("-p", "--pheno"), type="character", help="File containing two columns - sample names and values"), @@ -56,7 +62,7 @@ geno_file = opt$geno pheno_file = opt$pheno # Generate randomized filename for cross object -cross_file = file.path(opt$outdir, "cross", paste(stri_rand_strings(1, 8), ".cross", sep = "")) +cross_file = file.path(tmp_dir, "output", paste(stri_rand_strings(1, 8), ".cross", sep = "")) trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } -- cgit v1.2.3