aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/api/rqtl.py58
-rw-r--r--gn3/app.py2
-rw-r--r--gn3/commands.py14
-rw-r--r--gn3/computations/correlations.py26
-rw-r--r--gn3/computations/rqtl.py103
-rw-r--r--gn3/settings.py1
-rw-r--r--mypy.ini3
-rw-r--r--scripts/rqtl_wrapper.R229
-rw-r--r--tests/unit/computations/test_rqtl.py42
-rw-r--r--tests/unit/test_commands.py25
10 files changed, 490 insertions, 13 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
new file mode 100644
index 0000000..ebb746c
--- /dev/null
+++ b/gn3/api/rqtl.py
@@ -0,0 +1,58 @@
+"""Endpoints for running the rqtl cmd"""
+import os
+
+from flask import Blueprint
+from flask import current_app
+from flask import jsonify
+from flask import request
+
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output
+from gn3.computations.gemma import do_paths_exist
+
+rqtl = Blueprint("rqtl", __name__)
+
+@rqtl.route("/compute", methods=["POST"])
+def compute():
+ """Given at least a geno_file and pheno_file, generate and
+run the rqtl_wrapper script and return the results as JSON
+
+ """
+ genofile = request.form['geno_file']
+ phenofile = request.form['pheno_file']
+
+ if not do_paths_exist([genofile, phenofile]):
+ raise FileNotFoundError
+
+ # Split kwargs by those with values and boolean ones that just convert to True/False
+ kwargs = ["model", "method", "nperm", "scale", "control_marker"]
+ boolean_kwargs = ["addcovar", "interval", "pstrata"]
+ all_kwargs = kwargs + boolean_kwargs
+
+ rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
+ rqtl_bool_kwargs = []
+ for kwarg in all_kwargs:
+ if kwarg in request.form:
+ if kwarg in kwargs:
+ rqtl_kwargs[kwarg] = request.form[kwarg]
+ if kwarg in boolean_kwargs:
+ rqtl_bool_kwargs.append(kwarg)
+
+ rqtl_cmd = generate_rqtl_cmd(
+ rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"),
+ rqtl_wrapper_kwargs=rqtl_kwargs,
+ rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs
+ )
+
+ rqtl_output = {}
+ if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"),
+ "output", rqtl_cmd.get('output_file'))):
+ os.system(rqtl_cmd.get('rqtl_cmd'))
+
+ rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+
+ rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+ if int(rqtl_kwargs['nperm']) > 0:
+ rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \
+ process_perm_output(rqtl_cmd.get('output_file'))
+
+ return jsonify(rqtl_output)
diff --git a/gn3/app.py b/gn3/app.py
index dc89f55..046b5de 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -5,6 +5,7 @@ from typing import Dict
from typing import Union
from flask import Flask
from gn3.api.gemma import gemma
+from gn3.api.rqtl import rqtl
from gn3.api.general import general
from gn3.api.correlation import correlation
from gn3.api.data_entry import data_entry
@@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
app.config.from_pyfile(config)
app.register_blueprint(general, url_prefix="/api/")
app.register_blueprint(gemma, url_prefix="/api/gemma")
+ app.register_blueprint(rqtl, url_prefix="/api/rqtl")
app.register_blueprint(correlation, url_prefix="/api/correlation")
app.register_blueprint(data_entry, url_prefix="/api/dataentry")
return app
diff --git a/gn3/commands.py b/gn3/commands.py
index 4b0d62d..14bd295 100644
--- a/gn3/commands.py
+++ b/gn3/commands.py
@@ -30,6 +30,20 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper",
cmd += " ".join([f"{arg}" for arg in gemma_args])
return cmd
+def compose_rqtl_cmd(rqtl_wrapper_cmd: str,
+ rqtl_wrapper_kwargs: Dict,
+ rqtl_wrapper_bool_kwargs: list) -> str:
+ """Compose a valid R/qtl command given the correct input"""
+ # Add kwargs with values
+ cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join(
+ [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()])
+
+ # Add boolean kwargs (kwargs that are either on or off, like --interval)
+ if rqtl_wrapper_bool_kwargs:
+ cmd += " "
+ cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs])
+
+ return cmd
def queue_cmd(conn: Redis,
job_queue: str,
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index eae7ae4..bc738a7 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -69,8 +69,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
"spearman": scipy.stats.spearmanr
}
use_corr_method = corr_mapping.get(corr_method, "spearman")
- corr_coeffient, p_val = use_corr_method(primary_values, target_values)
- return (corr_coeffient, p_val)
+ corr_coefficient, p_val = use_corr_method(primary_values, target_values)
+ return (corr_coefficient, p_val)
def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
@@ -85,13 +85,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
if num_overlap > 5:
- (corr_coeffient, p_value) =\
+ (corr_coefficient, p_value) =\
compute_corr_coeff_p_value(primary_values=sanitized_traits_vals,
target_values=sanitized_target_vals,
corr_method=corr_method)
- if corr_coeffient is not None:
- return (trait_name, corr_coeffient, p_value, num_overlap)
+ if corr_coefficient is not None:
+ return (trait_name, corr_coefficient, p_value, num_overlap)
return None
@@ -145,10 +145,10 @@ def compute_all_sample_correlation(this_trait,
for sample_correlation in results:
if sample_correlation is not None:
- (trait_name, corr_coeffient, p_value,
+ (trait_name, corr_coefficient, p_value,
num_overlap) = sample_correlation
corr_result = {
- "corr_coeffient": corr_coeffient,
+ "corr_coefficient": corr_coefficient,
"p_value": p_value,
"num_overlap": num_overlap
}
@@ -156,7 +156,7 @@ def compute_all_sample_correlation(this_trait,
corr_results.append({trait_name: corr_result})
return sorted(
corr_results,
- key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"]))
+ key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
def benchmark_compute_all_sample(this_trait,
@@ -179,12 +179,12 @@ def benchmark_compute_all_sample(this_trait,
trait_vals=this_vals,
target_samples_vals=target_vals)
if sample_correlation is not None:
- (trait_name, corr_coeffient,
+ (trait_name, corr_coefficient,
p_value, num_overlap) = sample_correlation
else:
continue
corr_result = {
- "corr_coeffient": corr_coeffient,
+ "corr_coefficient": corr_coefficient,
"p_value": p_value,
"num_overlap": num_overlap
}
@@ -200,20 +200,20 @@ def tissue_correlation_for_trait(
compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict:
"""Given a primary tissue values for a trait and the target tissues values
compute the correlation_cooeff and p value the input required are arrays
- output -> List containing Dicts with corr_coefficient value,P_value and
+ output -> List containing Dicts with corr_coefficient value, P_value and
also the tissue numbers is len(primary) == len(target)
"""
# ax :todo assertion that length one one target tissue ==primary_tissue
- (tissue_corr_coeffient,
+ (tissue_corr_coefficient,
p_value) = compute_corr_p_value(primary_values=primary_tissue_vals,
target_values=target_tissues_values,
corr_method=corr_method)
tiss_corr_result = {trait_id: {
- "tissue_corr": tissue_corr_coeffient,
+ "tissue_corr": tissue_corr_coefficient,
"tissue_number": len(primary_tissue_vals),
"tissue_p_val": p_value}}
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
new file mode 100644
index 0000000..0433b3f
--- /dev/null
+++ b/gn3/computations/rqtl.py
@@ -0,0 +1,103 @@
+"""Procedures related rqtl computations"""
+import os
+from typing import Dict, List, Union
+
+import numpy as np
+
+from flask import current_app
+
+from gn3.commands import compose_rqtl_cmd
+from gn3.computations.gemma import generate_hash_of_string
+from gn3.fs_helpers import get_hash_of_files
+
+def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
+ rqtl_wrapper_kwargs: Dict,
+ rqtl_wrapper_bool_kwargs: list) -> Dict:
+ """Given the base rqtl_wrapper command and
+dict of keyword arguments, return the full rqtl_wrapper command and an
+output filename generated from a hash of the genotype and phenotype files
+
+ """
+
+ # Generate a hash from contents of the genotype and phenotype files
+ _hash = get_hash_of_files(
+ [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]])
+
+ # Append to hash a hash of keyword arguments
+ _hash += generate_hash_of_string(
+ ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]]))
+
+ # Append to hash a hash of boolean keyword arguments
+ _hash += generate_hash_of_string(
+ ",".join(rqtl_wrapper_bool_kwargs))
+
+ # Temporarily substitute forward-slashes in hash with underscores
+ _hash = _hash.replace("/", "_")
+
+ _output_filename = f"{_hash}-output.csv"
+ rqtl_wrapper_kwargs["filename"] = _output_filename
+
+ return {
+ "output_file":
+ _output_filename,
+ "rqtl_cmd":
+ compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd,
+ rqtl_wrapper_kwargs=rqtl_wrapper_kwargs,
+ rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs)
+ }
+
+
+def process_rqtl_output(file_name: str) -> List:
+ """Given an output file name, read in R/qtl results and return
+ a List of marker objects
+
+ """
+ marker_obs = []
+ # Later I should probably redo this using csv.read to avoid the
+ # awkwardness with removing quotes with [1:-1]
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", file_name), "r") as the_file:
+ for line in the_file:
+ line_items = line.split(",")
+ if line_items[1][1:-1] == "chr" or not line_items:
+ # Skip header line
+ continue
+
+ # Convert chr to int if possible
+ the_chr: Union[int, str]
+ try:
+ the_chr = int(line_items[1][1:-1])
+ except ValueError:
+ the_chr = line_items[1][1:-1]
+ this_marker = {
+ "name": line_items[0][1:-1],
+ "chr": the_chr,
+ "cM": float(line_items[2]),
+ "Mb": float(line_items[2]),
+ "lod_score": float(line_items[3])
+ }
+ marker_obs.append(this_marker)
+
+ return marker_obs
+
+
+def process_perm_output(file_name: str):
+ """Given base filename, read in R/qtl permutation output and calculate
+ suggestive and significant thresholds
+
+ """
+ perm_results = []
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", "PERM_" + file_name), "r") as the_file:
+ for i, line in enumerate(the_file):
+ if i == 0:
+ # Skip header line
+ continue
+
+ line_items = line.split(",")
+ perm_results.append(float(line_items[1]))
+
+ suggestive = np.percentile(np.array(perm_results), 67)
+ significant = np.percentile(np.array(perm_results), 95)
+
+ return perm_results, suggestive, significant
diff --git a/gn3/settings.py b/gn3/settings.py
index bde856d..770ba3d 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -6,6 +6,7 @@ import os
BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this!
DATA_DIR = ""
GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper")
+RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER")
CACHEDIR = ""
REDIS_URI = "redis://localhost:6379/0"
REDIS_JOB_QUEUE = "GN3::job-queue"
diff --git a/mypy.ini b/mypy.ini
index 5e1af13..5d66812 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -3,6 +3,9 @@
[mypy-scipy.*]
ignore_missing_imports = True
+[mypy-numpy.*]
+ignore_missing_imports = True
+
[mypy-MySQLdb.*]
ignore_missing_imports = True
diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
new file mode 100644
index 0000000..f7e0406
--- /dev/null
+++ b/scripts/rqtl_wrapper.R
@@ -0,0 +1,229 @@
+library(optparse)
+library(qtl)
+library(stringi)
+library(stringr)
+
+tmp_dir = Sys.getenv("TMPDIR")
+
+option_list = list(
+ make_option(c("-g", "--geno"), type="character", help=".geno file containing a dataset's genotypes"),
+ make_option(c("-p", "--pheno"), type="character", help="File containing two columns - sample names and values"),
+ make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"),
+ make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"),
+ make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"),
+ make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"),
+ make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"),
+ make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"),
+ make_option(c("-s", "--scale"), type="character", default="mb", help="Mapping scale - Megabases (Mb) or Centimorgans (cM)"),
+ make_option(c("--control"), type="character", default=NULL, help="Name of marker (contained in genotype file) to be used as a control"),
+ make_option(c("-o", "--outdir"), type="character", default=file.path(tmp_dir, "output"), help="Directory in which to write result file"),
+ make_option(c("-f", "--filename"), type="character", default=NULL, help="Name to use for result file"),
+ make_option(c("-v", "--verbose"), action="store_true", default=NULL, help="Show extra information")
+);
+
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+
+verbose_print <- function(...){
+ if (!is.null(opt$verbose)) {
+ for(item in list(...)){
+ cat(item)
+ }
+ cat("\n")
+ }
+}
+
+if (is.null(opt$geno) || is.null(opt$pheno)){
+ print_help(opt_parser)
+ stop("Both a genotype and phenotype file must be provided.", call.=FALSE)
+}
+
+geno_file = opt$geno
+pheno_file = opt$pheno
+
+# Generate randomized filename for cross object
+cross_file = file.path(tmp_dir, "cross", paste(stri_rand_strings(1, 8), ".cross", sep = ""))
+
+trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
+
+get_geno_code <- function(header, name = 'unk'){
+ mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
+ return(trim(strsplit(header[mat],':')[[1]][2]))
+}
+
+geno_to_csvr <- function(genotypes, trait_names, trait_vals, out, sex = NULL,
+ mapping_scale = "Mb", verbose = FALSE){
+ # Assume a geno header is not longer than 40 lines
+ header = readLines(genotypes, 40)
+
+ # Major hack to skip the geno headers
+ toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1) - 1
+
+ type <- get_geno_code(header, 'type')
+
+ # Get the genotype codes
+ if(type == '4-way'){
+ genocodes <- NULL
+ } else {
+ genocodes <- c(get_geno_code(header, 'mat'), get_geno_code(header, 'het'),
+ get_geno_code(header, 'pat'))
+ }
+ genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE,
+ na.strings=get_geno_code(header,'unk'),
+ colClasses='character', comment.char = '#')
+
+ verbose_print('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
+
+ # If there isn't a sex phenotype, treat all as males
+ if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))
+
+ cross_items = list()
+
+ # Add trait and covar phenotypes
+ for (i in 1:length(trait_names)){
+ cross_items[[i]] <- c(trait_names[i], '', '', unlist(trait_vals[[i]]))
+ }
+
+ # Sex phenotype for the mice
+ cross_items[[length(trait_names) + 1]] <- c('sex', '', '', sex)
+ # Genotypes
+ cross_items[[length(trait_names) + 2]] <- cbind(genodata[,c('Locus','Chr', mapping_scale)],
+ genodata[, 5:ncol(genodata)])
+
+ out_csvr <- do.call(rbind, cross_items)
+
+ # Save it to a file
+ write.table(out_csvr, file=out, row.names=FALSE, col.names=FALSE, quote=FALSE, sep=',')
+
+ # Load the created cross file using R/qtl read.cross
+ if (type == '4-way') {
+ verbose_print('Loading in as 4-WAY\n')
+ cross = read.cross(file=out, 'csvr', genotypes=NULL, crosstype="4way")
+ } else if(type == 'f2') {
+ verbose_print('Loading in as F2\n')
+ cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="f2")
+ } else {
+ verbose_print('Loading in as normal\n')
+ cross = read.cross(file=out, 'csvr', genotypes=genocodes)
+ }
+ if (type == 'riset') {
+ # If its a RIL, convert to a RIL in R/qtl
+ verbose_print('Converting to RISELF\n')
+ cross <- convert2riself(cross)
+ }
+
+ return(cross)
+}
+
+create_marker_covars <- function(the_cross, control_marker){
+ #' Given a string of one or more marker names (comma separated), fetch
+ #' the markers' values from the genotypes and return them as vectors/a vector
+ #' of values
+
+ # In case spaces are added in the string of marker names
+ covariate_names <- strsplit(str_replace(control_marker, " ", ""), ",")
+
+ genotypes <- pull.geno(the_cross)
+ covariates_in_geno <- which(covariate_names %in% colnames(genotypes))
+ covariate_names <- covariate_names[covariates_in_geno]
+ marker_covars <- genotypes[, unlist(covariate_names)]
+
+ return(marker_covars)
+}
+
+# Get phenotype vector from input file
+df <- read.table(pheno_file, na.strings = "x", header=TRUE, check.names=FALSE)
+sample_names <- df$Sample
+trait_names <- colnames(df)[2:length(colnames(df))]
+
+# Since there will always only be one non-covar phenotype, its name will be in the first column
+pheno_name = unlist(trait_names)[1]
+
+trait_vals <- vector(mode = "list", length = length(trait_names))
+for (i in 1:length(trait_names)) {
+ this_trait <- trait_names[i]
+ this_vals <- df[this_trait]
+ trait_vals[[i]] <- this_vals
+
+ trait_names[i] = paste("T_", this_trait, sep = "")
+}
+
+verbose_print('Generating cross object\n')
+cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file)
+
+# Calculate genotype probabilities
+if (!is.null(opt$interval)) {
+ verbose_print('Calculating genotype probabilities with interval mapping\n')
+ cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
+} else {
+ verbose_print('Calculating genotype probabilities\n')
+ cross_object <- calc.genoprob(cross_object)
+}
+
+# Pull covariates out of cross object, if they exist
+covars = vector(mode = "list", length = length(trait_names) - 1)
+if (!is.null(opt$addcovar)) {
+ #If perm strata are being used, it'll be included as the final column in the phenotype file
+ if (!is.null(opt$pstrata)) {
+ covar_names = trait_names[3:length(trait_names) - 1]
+ } else {
+ covar_names = trait_names[3:length(trait_names)]
+ }
+ covars <- pull.pheno(cross_object, covar_names)
+}
+
+# Pull permutation strata out of cross object, if it is being used
+perm_strata = vector()
+if (!is.null(opt$pstrata)) {
+ strata_col = trait_names[length(trait_names)]
+ perm_strata <- pull.pheno(cross_object, strata_col)
+}
+
+# If a marker name is supplied as covariate, get its vector of values and add them as a covariate
+if (!is.null(opt$control)) {
+ marker_covars = create_marker_covars(cross_object, opt$control)
+ covars <- cbind(covars, marker_covars)
+}
+
+# Calculate permutations
+if (opt$nperm > 0) {
+ if (!is.null(opt$filename)){
+ perm_out_file = file.path(opt$outdir, paste("PERM_", opt$filename, sep = "" ))
+ } else {
+ perm_out_file = file.path(opt$outdir, paste(pheno_name, "_PERM_", stri_rand_strings(1, 8), sep = ""))
+ }
+
+ if (!is.null(opt$addcovar) || !is.null(opt$control)){
+ if (!is.null(opt$pstrata)) {
+ verbose_print('Running permutations with cofactors and strata\n')
+ perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+ } else {
+ verbose_print('Running permutations with cofactors\n')
+ perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
+ }
+ } else {
+ if (!is.null(opt$pstrata)) {
+ verbose_print('Running permutations with strata\n')
+ perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+ } else {
+ verbose_print('Running permutations\n')
+ perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
+ }
+ }
+ write.csv(perm_results, perm_out_file)
+}
+
+if (!is.null(opt$filename)){
+ out_file = file.path(opt$outdir, opt$filename)
+} else {
+ out_file = file.path(opt$outdir, paste(pheno_name, "_", stri_rand_strings(1, 8), sep = ""))
+}
+
+if (!is.null(opt$addcovar) || !is.null(opt$control)){
+ verbose_print('Running scanone with cofactors\n')
+ qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
+} else {
+ verbose_print('Running scanone\n')
+ qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method)
+}
+write.csv(qtl_results, out_file)
diff --git a/tests/unit/computations/test_rqtl.py b/tests/unit/computations/test_rqtl.py
new file mode 100644
index 0000000..955d0ab
--- /dev/null
+++ b/tests/unit/computations/test_rqtl.py
@@ -0,0 +1,42 @@
+"""Test cases for procedures defined in computations.rqtl"""
+import unittest
+
+from unittest import mock
+from gn3.computations.rqtl import generate_rqtl_cmd
+
+class TestRqtl(unittest.TestCase):
+ """Test cases for computations.rqtl module"""
+ @mock.patch("gn3.computations.rqtl.generate_hash_of_string")
+ @mock.patch("gn3.computations.rqtl.get_hash_of_files")
+ def test_generate_rqtl_command(self, mock_get_hash_files, mock_generate_hash_string):
+ """Test computing mapping results with R/qtl"""
+ mock_get_hash_files.return_value = "my-hash1"
+ mock_generate_hash_string.return_value = "my-hash2"
+
+ self.assertEqual(
+ generate_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper",
+ rqtl_wrapper_kwargs={
+ "g": "genofile",
+ "p": "phenofile",
+ "model": "normal",
+ "method": "hk",
+ "nperm": 1000,
+ "scale": "Mb",
+ "control": "rs123456"
+ },
+ rqtl_wrapper_bool_kwargs=[
+ "addcovar",
+ "interval"
+ ]), {
+ "output_file":
+ "my-hash1my-hash2my-hash2-output.csv",
+ "rqtl_cmd": (
+ "Rscript rqtl-wrapper "
+ "--g genofile --p phenofile "
+ "--model normal --method hk "
+ "--nperm 1000 --scale Mb "
+ "--control rs123456 "
+ "--filename my-hash1my-hash2my-hash2-output.csv "
+ "--addcovar --interval"
+ )
+ })
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index aafb3a2..f36ba55 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -6,6 +6,7 @@ from datetime import datetime
from typing import Callable
from unittest import mock
from gn3.commands import compose_gemma_cmd
+from gn3.commands import compose_rqtl_cmd
from gn3.commands import queue_cmd
from gn3.commands import run_cmd
from gn3.exceptions import RedisConnectionError
@@ -53,6 +54,30 @@ class TestCommands(unittest.TestCase):
"-p /tmp/gf13Ad0tRX/phenofile.txt"
" -gk"))
+ def test_compose_rqtl_cmd(self):
+ """Test that the R/qtl cmd is composed correctly"""
+ self.assertEqual(
+ compose_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper",
+ rqtl_wrapper_kwargs={
+ "g": "the_genofile",
+ "p": "the_phenofile",
+ "model": "np",
+ "method": "ehk",
+ "nperm": 2000,
+ "scale": "Mb",
+ "control": "rs234567"
+ },
+ rqtl_wrapper_bool_kwargs=[
+ "addcovar"
+ ]),
+ ("Rscript rqtl-wrapper "
+ "--g the_genofile --p the_phenofile "
+ "--model np --method ehk "
+ "--nperm 2000 --scale Mb "
+ "--control rs234567 "
+ "--addcovar")
+ )
+
def test_queue_cmd_exception_raised_when_redis_is_down(self):
"""Test that the correct error is raised when Redis is unavailable"""
self.assertRaises(RedisConnectionError,