From 9b628abcee86ef29e869b92a66b6034c1b63359d Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 20:11:15 +0000 Subject: Add rqtl.py for rqtl endpoints --- gn3/api/rqtl.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 gn3/api/rqtl.py (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py new file mode 100644 index 0000000..8dd4bb7 --- /dev/null +++ b/gn3/api/rqtl.py @@ -0,0 +1,19 @@ +import os + +from flask import Blueprint +from flask import current_app +from flask import request + +rqtl = Blueprint("rqtl", __name__) + +@rqtl.route("/compute", methods=["POST"]) +def compute(): + working_dir = os.path.join(current_app.config.get("TMPDIR")) + + genofile = request.form['geno_file'] + phenofile = request.form['pheno_file'] + + if not do_paths_exist([genofile, phenofile]): + raise FileNotFoundError + + return current_app.config.get("RQTL_WRAPPER_CMD") \ No newline at end of file -- cgit v1.2.3 From 7ed84670c0d13de38b578a4e4177b2529ff3fb40 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:54:42 +0000 Subject: Read in kwargs fromrequest and pass the command, tmpdir, and kwargs to generate_rqtl_cmd which returns the actual command and output path --- gn3/api/rqtl.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 8dd4bb7..82cf34f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -2,8 +2,12 @@ import os from flask import Blueprint from flask import current_app +from flask import jsonify from flask import request +from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.computations.gemma import do_paths_exist + rqtl = Blueprint("rqtl", __name__) @rqtl.route("/compute", methods=["POST"]) @@ -16,4 +20,18 @@ def compute(): if not do_paths_exist([genofile, phenofile]): raise FileNotFoundError - return current_app.config.get("RQTL_WRAPPER_CMD") \ No newline at end of file + kwarg_list = ["addcovar", "model", "method", "interval", "nperm", "scale", "control_marker"] + + rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + for kwarg in kwarg_list: + if kwarg in request.form: + rqtl_kwargs[kwarg] = request.form[kwarg] + + results = generate_rqtl_cmd( + rqtl_wrapper_cmd = current_app.config.get("RQTL_WRAPPER_CMD"), + output_dir = current_app.config.get('TMPDIR'), + rqtl_wrapper_kwargs = rqtl_kwargs + ) + + return jsonify(results) + -- cgit v1.2.3 From 0a8754a582f057bd335441eab15da3f629df9ad7 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 23:56:10 +0000 Subject: Fixed variety of issues detected by pylint --- gn3/api/rqtl.py | 13 ++++++------- gn3/computations/rqtl.py | 50 ++++++++++++++++++++++++------------------------ 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 82cf34f..7756310 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,5 +1,4 @@ -import os - +"""Endpoints for running the rqtl cmd""" from flask import Blueprint from flask import current_app from flask import jsonify @@ -12,8 +11,10 @@ rqtl = Blueprint("rqtl", __name__) @rqtl.route("/compute", methods=["POST"]) def compute(): - working_dir = os.path.join(current_app.config.get("TMPDIR")) + """Given at least a geno_file and pheno_file, generate and +run the rqtl_wrapper script and return the results as JSON + """ genofile = request.form['geno_file'] phenofile = request.form['pheno_file'] @@ -28,10 +29,8 @@ def compute(): rqtl_kwargs[kwarg] = request.form[kwarg] results = generate_rqtl_cmd( - rqtl_wrapper_cmd = current_app.config.get("RQTL_WRAPPER_CMD"), - output_dir = current_app.config.get('TMPDIR'), - rqtl_wrapper_kwargs = rqtl_kwargs + rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), + rqtl_wrapper_kwargs=rqtl_kwargs ) return jsonify(results) - diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 087a99f..0e8cd1f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,25 +1,25 @@ -"""Procedures related rqtl computations""" -import os - -from typing import Dict -from gn3.commands import compose_rqtl_cmd -from gn3.fs_helpers import get_hash_of_files - -def generate_rqtl_cmd(rqtl_wrapper_cmd: str, - output_dir: str, - rqtl_wrapper_kwargs: Dict) -> Dict: - - _hash = get_hash_of_files( - [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p", "addcovar", - "model", "method", - "interval", "nperm", - "scale", "control"]]) - - _output_filename = f"{_hash}-output.json" - return { - "output_file": - _output_filename, - "rqtl_cmd": - compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, - rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) - } \ No newline at end of file +"""Procedures related rqtl computations""" + +from typing import Dict +from gn3.commands import compose_rqtl_cmd +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict) -> Dict: + """Given the base rqtl_wrapper command and +dict of keyword arguments, return the full rqtl_wrapper command and an +output filename generated from a hash of the genotype and phenotype files + + """ + + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + + _output_filename = f"{_hash}-output.json" + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + } -- cgit v1.2.3 From 33d3a518e378c56b080a2a15b264ee5d031537e8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:42:27 +0000 Subject: Account for boolean kwargs in compute() by storing them in a list, since they don't have corresponding values --- gn3/api/rqtl.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 7756310..de620f7 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -21,16 +21,24 @@ run the rqtl_wrapper script and return the results as JSON if not do_paths_exist([genofile, phenofile]): raise FileNotFoundError - kwarg_list = ["addcovar", "model", "method", "interval", "nperm", "scale", "control_marker"] + # Split kwargs by those with values and boolean ones that just convert to True/False + kwargs = ["model", "method", "nperm", "scale", "control_marker"] + boolean_kwargs = ["addcovar", "interval"] + all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} - for kwarg in kwarg_list: + rqtl_bool_kwargs = [] + for kwarg in all_kwargs: if kwarg in request.form: - rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in kwargs: + rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in boolean_kwargs: + rqtl_bool_kwargs.append(kwarg) results = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), - rqtl_wrapper_kwargs=rqtl_kwargs + rqtl_wrapper_kwargs=rqtl_kwargs, + rqtl_wrapper_bool_kwargs=boolean_kwargs ) return jsonify(results) -- cgit v1.2.3 From d3a4146fd38fc1d372091cecadfcf7c8fb377f3b Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:27:05 +0000 Subject: Include code that processes rqtl output files and returns actual results instead of just the output filename --- gn3/api/rqtl.py | 16 +++++++++--- gn3/computations/rqtl.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index de620f7..0194b6f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,10 +1,13 @@ """Endpoints for running the rqtl cmd""" +import os + from flask import Blueprint from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.commands import run_cmd +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) @@ -35,10 +38,17 @@ run the rqtl_wrapper script and return the results as JSON if kwarg in boolean_kwargs: rqtl_bool_kwargs.append(kwarg) - results = generate_rqtl_cmd( + rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, rqtl_wrapper_bool_kwargs=boolean_kwargs ) - return jsonify(results) + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output = {} + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if int(rqtl_kwargs['nperm']) > 0: + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + + return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 605e0e1..22d9faf 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,6 +1,11 @@ """Procedures related rqtl computations""" - +import os +import numpy as np from typing import Dict +from typing import List + +from flask import current_app + from gn3.commands import compose_rqtl_cmd from gn3.computations.gemma import generate_hash_of_string from gn3.fs_helpers import get_hash_of_files @@ -29,7 +34,9 @@ output filename generated from a hash of the genotype and phenotype files # Temporarily substitute forward-slashes in hash with underscores _hash = _hash.replace("/", "_") - _output_filename = f"{_hash}-output.json" + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + return { "output_file": _output_filename, @@ -38,3 +45,56 @@ output filename generated from a hash of the genotype and phenotype files rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + else: + # Convert chr to int if possible + try: + the_chr = int(line_items[1][1:-1]) + except: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + else: + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant -- cgit v1.2.3 From 84790de8bcc51c00a92b71878088345cd58ccc51 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 28 May 2021 17:44:07 +0000 Subject: Fixed issue where all bool kwargs were always being passed to generate_rqtl_cmd and also made code check if output file already exists (so caching works) --- gn3/api/rqtl.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 0194b6f..38f4c1e 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -26,7 +26,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval"] + boolean_kwargs = ["addcovar", "interval", "pstrata"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -41,12 +41,15 @@ run the rqtl_wrapper script and return the results as JSON rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, - rqtl_wrapper_bool_kwargs=boolean_kwargs + rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs ) - os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output = {} + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From d42b85ae5fcea1b71a7165fd6e64745a228c48f9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 21:13:03 +0000 Subject: Fixed pylint issues --- gn3/api/rqtl.py | 7 ++++--- gn3/computations/rqtl.py | 45 ++++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 24 deletions(-) (limited to 'gn3/api') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 38f4c1e..ebb746c 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,6 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.commands import run_cmd from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist @@ -45,13 +44,15 @@ run the rqtl_wrapper script and return the results as JSON ) rqtl_output = {} - if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), + "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: - rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ + process_perm_output(rqtl_cmd.get('output_file')) return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 7b1a35c..0433b3f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,8 +1,9 @@ """Procedures related rqtl computations""" import os -import numpy as np from typing import Dict, List, Union +import numpy as np + from flask import current_app from gn3.commands import compose_rqtl_cmd @@ -54,27 +55,28 @@ def process_rqtl_output(file_name: str) -> List: marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] - with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: # Skip header line continue - else: - # Convert chr to int if possible - the_chr: Union[int, str] - try: - the_chr = int(line_items[1][1:-1]) - except: - the_chr = line_items[1][1:-1] - this_marker = { - "name": line_items[0][1:-1], - "chr": the_chr, - "cM": float(line_items[2]), - "Mb": float(line_items[2]), - "lod_score": float(line_items[3]) - } - marker_obs.append(this_marker) + + # Convert chr to int if possible + the_chr: Union[int, str] + try: + the_chr = int(line_items[1][1:-1]) + except ValueError: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) return marker_obs @@ -85,14 +87,15 @@ def process_perm_output(file_name: str): """ perm_results = [] - with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "PERM_" + file_name), "r") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line continue - else: - line_items = line.split(",") - perm_results.append(float(line_items[1])) + + line_items = line.split(",") + perm_results.append(float(line_items[1])) suggestive = np.percentile(np.array(perm_results), 67) significant = np.percentile(np.array(perm_results), 95) -- cgit v1.2.3