aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2022-03-22 15:41:45 -0500
committerGitHub2022-03-22 15:41:45 -0500
commitf4b02281cdf0a29d82caf8c06ce96f947e5cf623 (patch)
treea81c350a5a08b3b7cdf8375b728b48951791a14c
parent7b2901817a1aabd947483f87b5a2a2d33618de7e (diff)
parenta75634cc5637168165e601f09dc9ad820e6e443f (diff)
downloadgenenetwork3-f4b02281cdf0a29d82caf8c06ce96f947e5cf623.tar.gz
Merge pull request #29 from zsloan/feature/add_rqtl_pairscan
Feature/add rqtl pairscan
-rw-r--r--gn3/api/rqtl.py11
-rw-r--r--gn3/computations/rqtl.py188
-rw-r--r--scripts/rqtl_wrapper.R137
3 files changed, 267 insertions, 69 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index b5627c5..70ebe12 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -6,7 +6,8 @@ from flask import current_app
from flask import jsonify
from flask import request
-from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \
+ process_rqtl_pairscan, process_perm_output
from gn3.computations.gemma import do_paths_exist
rqtl = Blueprint("rqtl", __name__)
@@ -25,7 +26,7 @@ run the rqtl_wrapper script and return the results as JSON
# Split kwargs by those with values and boolean ones that just convert to True/False
kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"]
- boolean_kwargs = ["addcovar", "interval", "pstrata"]
+ boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"]
all_kwargs = kwargs + boolean_kwargs
rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
@@ -48,9 +49,11 @@ run the rqtl_wrapper script and return the results as JSON
"output", rqtl_cmd.get('output_file'))):
os.system(rqtl_cmd.get('rqtl_cmd'))
- rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+ if "pairscan" in rqtl_bool_kwargs:
+ rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile)
+ else:
+ rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file'))
- rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
if int(rqtl_kwargs['nperm']) > 0:
rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \
process_perm_output(rqtl_cmd.get('output_file'))
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index b3539a9..65ee6de 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,6 +1,7 @@
"""Procedures related rqtl computations"""
import os
-from typing import Dict, List, Union
+from bisect import bisect
+from typing import Dict, List, Tuple, Union
import numpy as np
@@ -15,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
rqtl_wrapper_bool_kwargs: list) -> Dict:
"""Given the base rqtl_wrapper command and
dict of keyword arguments, return the full rqtl_wrapper command and an
-output filename generated from a hash of the genotype and phenotype files
-
- """
+output filename generated from a hash of the genotype and phenotype files"""
# Generate a hash from contents of the genotype and phenotype files
_hash = get_hash_of_files(
@@ -47,11 +46,9 @@ output filename generated from a hash of the genotype and phenotype files
}
-def process_rqtl_output(file_name: str) -> List:
+def process_rqtl_mapping(file_name: str) -> List:
"""Given an output file name, read in R/qtl results and return
- a List of marker objects
-
- """
+ a List of marker objects"""
marker_obs = []
# Later I should probably redo this using csv.read to avoid the
# awkwardness with removing quotes with [1:-1]
@@ -80,12 +77,181 @@ def process_rqtl_output(file_name: str) -> List:
return marker_obs
+def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
+ """Given an output file name, read in R/qtl pair-scan results and return
+a list of both the JSON needed for the d3panels figure and a list of results
+to be used when generating the results table (which will include marker names)"""
+ figure_data = pairscan_for_figure(file_name)
+ table_data = pairscan_for_table(file_name, geno_file)
-def process_perm_output(file_name: str):
- """Given base filename, read in R/qtl permutation output and calculate
- suggestive and significant thresholds
+ return [figure_data, table_data]
+
+def pairscan_for_figure(file_name: str) -> Dict:
+ """Given an output file name, read in R/qtl pair-scan results and return
+ the JSON needed for the d3panels figure"""
+ figure_data = {}
+
+ # Open the file with the actual results, written as a list of lists
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", file_name), "r") as the_file:
+ lod_results = []
+ for i, line in enumerate(the_file):
+ if i == 0: # Skip first line
+ continue
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ lod_results.append(line_items[1:]) # Append all but first item in line
+ figure_data['lod'] = lod_results
+
+ # Open the map file with the list of markers/pseudomarkers and their positions
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", "MAP_" + file_name), "r") as the_file:
+ chr_list = [] # type: List
+ pos_list = [] # type: List
+ for i, line in enumerate(the_file):
+ if i == 0: # Skip first line
+ continue
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ chr_list.append(line_items[1][1:-1])
+ pos_list.append(line_items[2])
+ figure_data['chr'] = chr_list
+ figure_data['pos'] = pos_list
+
+ return figure_data
+
+def get_marker_list(map_file: str) -> List:
+ """
+ Open the map file with the list of markers/pseudomarkers and create list of marker obs
+
+ PARAMETERS:
+ map_file: The map file output by R/qtl containing marker/pseudomarker positions
+ """
+
+ marker_list = []
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", map_file), "r") as map_fh:
+ for line in map_fh.readlines()[1:]:
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ this_marker = {
+ 'name': line_items[0],
+ 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string
+ 'pos': line_items[2]
+ }
+
+ marker_list.append(this_marker)
+
+ return marker_list
+
+def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List:
+ """
+ Open the file with the actual R/qtl pair-scan results and write them as
+ they will be displayed in the results table
+ PARAMETERS:
+ results_file: The filename containing R/qtl pair-scan results
+ marker_list: List of marker/pseudomarker names/positions from results
+ original_markers: Dict of markers from the .geno file, for finding proximal/distal
+ markers to each pseudomarker
"""
+
+ table_data = []
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", results_file), "r") as the_file:
+ for i, line in enumerate(the_file.readlines()[1:]):
+ marker_1 = marker_list[i]
+ marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'],
+ marker_1['pos'],
+ original_markers)
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ for j, item in enumerate(line_items[1:]):
+ marker_2 = marker_list[j]
+ marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'],
+ marker_2['pos'],
+ original_markers)
+ try:
+ lod_score = f"{float(item):.3f}"
+ except ValueError:
+ lod_score = f"{item}"
+
+ table_data.append({
+ 'proximal1': marker_1['proximal'],
+ 'distal1': marker_1['distal'],
+ 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
+ 'lod': lod_score,
+ 'proximal2': marker_2['proximal'],
+ 'distal2': marker_2['distal'],
+ 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
+ })
+
+ return table_data
+
+def pairscan_for_table(file_name: str, geno_file: str) -> List:
+ """
+ Read in R/qtl pair-scan results and return as List representing
+ table row contents
+
+ PARAMETERS:
+ file_name: The filename containing R/qtl pair-scan results
+ geno_file: Filename of the genotype file (used to get marker positions)
+ """
+
+ # Open the map file with the list of markers/pseudomarkers and create list of marker obs
+ marker_list = get_marker_list("MAP_" + file_name)
+
+ # Get the list of original markers from the .geno file
+ original_markers = build_marker_pos_dict(geno_file)
+
+ # Open the file with the actual results and write the results as
+ # they will be displayed in the results table
+ table_data = generate_table_rows(file_name, marker_list, original_markers)
+
+ return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_dict(genotype_file: str) -> Dict:
+ """Gets list of markers and their positions from .geno file
+
+ Basically a pared-down version of parse_genotype_file for R/qtl pair-scan"""
+
+ with open(genotype_file, "r") as infile:
+ contents = infile.readlines()
+
+ # Get all lines after the metadata
+ lines = tuple(line for line in contents if
+ ((not line.strip().startswith("#")) and
+ (not line.strip().startswith("@")) and
+ (not line.strip() == "")))
+
+ header_items = lines[0].split("\t")
+ mb_exists = "Mb" in header_items
+ pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+ the_markers = {"1": {}} # type: Dict[str, Dict]
+ for line in lines[1:]: # The lines with markers
+ line_items = line.split("\t")
+ this_chr = line_items[0]
+ if this_chr not in the_markers:
+ the_markers[this_chr] = {}
+ the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+ return the_markers
+
+def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
+ """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+ return the nearest real marker"""
+
+ pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+ # Get the position of the pseudomarker in the list of markers for the chr
+ the_pos_index = bisect(pos_list, float(the_pos))
+
+ proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+ distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
+
+ return proximal_marker, distal_marker
+
+def process_perm_output(file_name: str) -> Tuple[List, float, float]:
+ """Given base filename, read in R/qtl permutation output and calculate
+ suggestive and significant thresholds"""
+
perm_results = []
with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
"output", "PERM_" + file_name), "r", encoding="utf-8") as the_file:
diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index 13c2684..ea2c345 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -12,6 +12,7 @@ option_list = list(
make_option(c("--covarstruct"), type="character", help="File detailing which covariates are categorical or numerical"),
make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"),
make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"),
+ make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"),
make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"),
make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"),
make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"),
@@ -171,9 +172,15 @@ verbose_print('Generating cross object\n')
cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type)
# Calculate genotype probabilities
-if (!is.null(opt$interval)) {
+if (!is.null(opt$pairscan)) {
+ verbose_print('Calculating genotype probabilities for pair-scan\n')
+ cross_object <- calc.genoprob(cross_object, step=10)
+} else if (!is.null(opt$interval)) {
verbose_print('Calculating genotype probabilities with interval mapping\n')
cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
+} else if (!is.null(opt$pairscan)) {
+ verbose_print('Calculating genotype probabilities for pair-scan\n')
+ cross_object <- calc.genoprob(cross_object, step=10)
} else {
verbose_print('Calculating genotype probabilities\n')
cross_object <- calc.genoprob(cross_object)
@@ -188,6 +195,7 @@ if (type == "4-way") {
# Pull covariates out of cross object, if they exist
covars <- c() # Holds the covariates which should be passed to R/qtl
if (!is.null(opt$addcovar)) {
+ verbose_print('Pulling covariates out of cross object\n')
# If perm strata are being used, it'll be included as the final column in the phenotype file
if (!is.null(opt$pstrata)) {
covar_names = trait_names[2:(length(trait_names)-1)]
@@ -223,16 +231,30 @@ if (!is.null(opt$addcovar)) {
# Pull permutation strata out of cross object, if it is being used
perm_strata = vector()
if (!is.null(opt$pstrata)) {
+ verbose_print('Pulling permutation strata out of cross object\n')
strata_col = trait_names[length(trait_names)]
perm_strata <- pull.pheno(cross_object, strata_col)
}
# If a marker name is supplied as covariate, get its vector of values and add them as a covariate
if (!is.null(opt$control)) {
+ verbose_print('Creating marker covariates and binding them to covariates vector\n')
marker_covars = create_marker_covars(cross_object, opt$control)
covars <- cbind(covars, marker_covars)
}
+if (!is.null(opt$pairscan)) {
+ verbose_print("Running scantwo")
+ scan_func <- function(...){
+ scantwo(...)
+ }
+} else {
+ verbose_print("Running scanone")
+ scan_func <- function(...){
+ scanone(...)
+ }
+}
+
# Calculate permutations
if (opt$nperm > 0) {
if (!is.null(opt$filename)){
@@ -243,19 +265,19 @@ if (opt$nperm > 0) {
if (!is.null(opt$addcovar) || !is.null(opt$control)){
if (!is.null(opt$pstrata)) {
- verbose_print('Running ', opt$nperm, ' permutations with cofactors and strata\n')
- perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+ verbose_print('Running permutations with cofactors and strata\n')
+ perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
} else {
- verbose_print('Running ', opt$nperm, ' permutations with cofactors\n')
- perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
+ verbose_print('Running permutations with cofactors\n')
+ perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
}
} else {
if (!is.null(opt$pstrata)) {
- verbose_print('Running ', opt$nperm, ' permutations with strata\n')
- perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+ verbose_print('Running permutations with strata\n')
+ perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
} else {
- verbose_print('Running ', opt$nperm, ' permutations\n')
- perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
+ verbose_print('Running permutations\n')
+ perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
}
}
write.csv(perm_results, perm_out_file)
@@ -268,57 +290,64 @@ if (!is.null(opt$filename)){
}
if (!is.null(opt$addcovar) || !is.null(opt$control)){
- verbose_print('Running scanone with cofactors\n')
- qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
+ verbose_print('Running scan with cofactors\n')
+ qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
} else {
- verbose_print('Running scanone\n')
- qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method)
+ verbose_print('Running scan\n')
+ qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method)
}
-#QTL main effects on adjusted longevity
-getEffects <- function(sdata, gtsprob, marker = "1_24042124", model = "longevity ~ sex + site + cohort + treatment", trait = "longevity"){
- rownames(sdata) <- 1:nrow(sdata)
- rownames(gtsprob) <- 1:nrow(gtsprob)
- mp <- gtsprob[, grep(marker, colnames(gtsprob))]
- gts <- unlist(lapply(lapply(lapply(apply(mp,1,function(x){which(x > 0.85)}),names), strsplit, ":"), function(x){
- if(length(x) > 0){ return(x[[1]][2]); }else{ return(NA) }
- }))
-
- ismissing <- which(apply(sdata, 1, function(x){any(is.na(x))}))
- if(length(ismissing) > 0){
- sdata <- sdata[-ismissing, ]
- gts <- gts[-ismissing]
+verbose_print('Writing results to CSV file\n')
+if (!is.null(opt$pairscan)) {
+ map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" ))
+ write.csv(qtl_results[1], out_file)
+ write.csv(qtl_results[2], map_out_file)
+} else {
+ # QTL main effects on adjusted longevity
+ getEffects <- function(sdata, gtsprob, marker = "1_24042124", model = "longevity ~ sex + site + cohort + treatment", trait = "longevity"){
+ rownames(sdata) <- 1:nrow(sdata)
+ rownames(gtsprob) <- 1:nrow(gtsprob)
+ mp <- gtsprob[, grep(marker, colnames(gtsprob))]
+ gts <- unlist(lapply(lapply(lapply(apply(mp,1,function(x){which(x > 0.85)}),names), strsplit, ":"), function(x){
+ if(length(x) > 0){ return(x[[1]][2]); }else{ return(NA) }
+ }))
+
+ ismissing <- which(apply(sdata, 1, function(x){any(is.na(x))}))
+ if(length(ismissing) > 0){
+ sdata <- sdata[-ismissing, ]
+ gts <- gts[-ismissing]
+ }
+
+ mlm <- lm(as.formula(model), data = sdata)
+ pheAdj <- rep(NA, nrow(sdata))
+ adj <- residuals(mlm) + mean(sdata[, trait])
+ pheAdj[as.numeric(names(adj))] <- adj
+ means <- c(mean(pheAdj[which(gts == "AC")],na.rm=TRUE),mean(pheAdj[which(gts == "AD")],na.rm=TRUE),mean(pheAdj[which(gts == "BC")],na.rm=TRUE),mean(pheAdj[which(gts == "BD")],na.rm=TRUE))
+ std <- function(x) sd(x,na.rm=TRUE)/sqrt(length(x))
+ stderrs <- c(std(pheAdj[which(gts == "AC")]),std(pheAdj[which(gts == "AD")]),std(pheAdj[which(gts == "BC")]),std(pheAdj[which(gts == "BD")]))
+ paste0(round(means,0), " ± ", round(stderrs,2))
}
- mlm <- lm(as.formula(model), data = sdata)
- pheAdj <- rep(NA, nrow(sdata))
- adj <- residuals(mlm) + mean(sdata[, trait])
- pheAdj[as.numeric(names(adj))] <- adj
- means <- c(mean(pheAdj[which(gts == "AC")],na.rm=TRUE),mean(pheAdj[which(gts == "AD")],na.rm=TRUE),mean(pheAdj[which(gts == "BC")],na.rm=TRUE),mean(pheAdj[which(gts == "BD")],na.rm=TRUE))
- std <- function(x) sd(x,na.rm=TRUE)/sqrt(length(x))
- stderrs <- c(std(pheAdj[which(gts == "AC")]),std(pheAdj[which(gts == "AD")]),std(pheAdj[which(gts == "BC")]),std(pheAdj[which(gts == "BD")]))
- paste0(round(means,0), " ± ", round(stderrs,2))
-}
+ if (type == "4-way") {
+ verbose_print("Get phenotype name + genoprob + all phenotypes + models for 4-way crosses")
+ traitname <- colnames(pull.pheno(cross_object))[1]
+ gtsp <- pull.genoprob(cross_object)
+ allpheno <- pull.pheno(cross_object)
+ if (!is.null(opt$addcovar)) {
+ model <- paste0(traitname, " ~ ", paste0(covar_names, sep="", collapse=" + "))
+ } else {
+ model <- paste0(traitname, " ~ 1 ")
+ }
-if (type == "4-way") {
- verbose_print("Get phenotype name + genoprob + all phenotypes + models for 4-way crosses")
- traitname <- colnames(pull.pheno(cross_object))[1]
- gtsp <- pull.genoprob(cross_object)
- allpheno <- pull.pheno(cross_object)
- if (!is.null(opt$addcovar)) {
- model <- paste0(traitname, " ~ ", paste0(covar_names, sep="", collapse=" + "))
- } else {
- model <- paste0(traitname, " ~ 1 ")
+ meffects <- c()
+ verbose_print("Getting QTL main effects for 4-way crosses")
+ for(marker in rownames(qtl_results)){
+ meff <- getEffects(allpheno, gtsp, marker = marker, model, trait = traitname)
+ meffects <- rbind(meffects, meff)
+ }
+ qtl_results <- cbind(data.frame(qtl_results[,1:3]), meffects)
+ colnames(qtl_results)[4:7] <- c("AC", "AD", "BC", "BD")
}
- meffects <- c()
- verbose_print("Getting QTL main effects for 4-way crosses")
- for(marker in rownames(qtl_results)){
- meff <- getEffects(allpheno, gtsp, marker = marker, model, trait = traitname)
- meffects <- rbind(meffects, meff)
- }
- qtl_results <- cbind(data.frame(qtl_results[,1:3]), meffects)
- colnames(qtl_results)[4:7] <- c("AC", "AD", "BC", "BD")
+ write.csv(qtl_results, out_file)
}
-
-write.csv(qtl_results, out_file)