From d3a4146fd38fc1d372091cecadfcf7c8fb377f3b Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:27:05 +0000 Subject: Include code that processes rqtl output files and returns actual results instead of just the output filename --- gn3/api/rqtl.py | 16 +++++++++--- gn3/computations/rqtl.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index de620f7..0194b6f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,10 +1,13 @@ """Endpoints for running the rqtl cmd""" +import os + from flask import Blueprint from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.commands import run_cmd +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) @@ -35,10 +38,17 @@ run the rqtl_wrapper script and return the results as JSON if kwarg in boolean_kwargs: rqtl_bool_kwargs.append(kwarg) - results = generate_rqtl_cmd( + rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, rqtl_wrapper_bool_kwargs=boolean_kwargs ) - return jsonify(results) + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output = {} + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if int(rqtl_kwargs['nperm']) > 0: + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + + return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 605e0e1..22d9faf 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,6 +1,11 @@ """Procedures related rqtl computations""" - +import os +import numpy as np from typing import Dict +from typing import List + +from flask import current_app + from gn3.commands import compose_rqtl_cmd from gn3.computations.gemma import generate_hash_of_string from gn3.fs_helpers import get_hash_of_files @@ -29,7 +34,9 @@ output filename generated from a hash of the genotype and phenotype files # Temporarily substitute forward-slashes in hash with underscores _hash = _hash.replace("/", "_") - _output_filename = f"{_hash}-output.json" + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + return { "output_file": _output_filename, @@ -38,3 +45,56 @@ output filename generated from a hash of the genotype and phenotype files rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + else: + # Convert chr to int if possible + try: + the_chr = int(line_items[1][1:-1]) + except: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + else: + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant -- cgit v1.2.3