From cfc1378f767b4bad12af6ffb38f0ec9e1db12359 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 18 Mar 2015 19:08:14 +0000 Subject: Adding a fix to purge non-existing markers as covariates for the R/qtl analysis --- .../wqflask/marker_regression/marker_regression.py | 46 ++++++++++++++-------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 60bc721e..9a3ff073 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -76,7 +76,10 @@ class MarkerRegression(object): elif self.mapping_method == "rqtl_plink": qtl_results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": - self.num_perm = start_vars['num_perm'] + if start_vars['num_perm'] == "": + self.num_perm = 0 + else: + self.num_perm = start_vars['num_perm'] self.control = start_vars['control_marker'] print("DOING RQTL GENO") @@ -232,18 +235,21 @@ class MarkerRegression(object): def geno_to_rqtl_function(self): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly print("Adding a function to the R environment") ro.r(""" + trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } + getGenoCode <- function(header, name = 'unk'){ mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) - return(strsplit(header[mat],'')[[1]][6]) + return(trim(strsplit(header[mat],':')[[1]][2])) } GENOtoCSVR <- function(genotypes = 'BXD.geno', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ header = readLines(genotypes, 40) toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers + genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes - + type <- getGenoCode(header, 'type') genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') - cat('Genodata:', toskip, " ", dim(genodata), '\n') + cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype @@ -251,7 +257,9 @@ class MarkerRegression(object): cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file require(qtl) - return(read.cross(file=out, 'csvr', genotypes=genocodes)) # Load it using R/qtl read.cross + cross = read.cross(file=out, 'csvr', genotypes=genocodes) + if(type == 'riset') cross <- convert2riself(cross) + return(cross) # Load it using R/qtl read.cross } """) @@ -263,6 +271,7 @@ class MarkerRegression(object): ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function + r_sum = ro.r["sum"] # Map the ncol function print(r_library("qtl")) # Load R/qtl @@ -285,25 +294,24 @@ class MarkerRegression(object): else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") - cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype()) # Add the phenotype + cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype()) # Add the phenotype # for debug: write_cross(cross_object, "csvr", "test.csvr") # Scan for QTLs - if(self.control.replace(" ", "") != ""): - covar = self.create_covariates(cross_object) - result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar) + covar = self.create_covariates(cross_object) + if(r_sum(covar)[0] > 0): + print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar) else: - result_data_frame = scanone(cross_object, pheno = "the_pheno") + print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno") - if int(self.num_perm) > 0: # Do permutation (if requested by user) - if(self.control.replace(" ", "") != ""): - covar = self.create_covariates(cross_object) - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm=int(self.num_perm)) + if int(self.num_perm) > 0: # Do permutation (if requested by user) + if(r_sum(covar)[0] > 0): + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = int(self.num_perm)) else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm=int(self.num_perm)) + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = int(self.num_perm)) - self.process_rqtl_perm_results(perm_data_frame) # Functions that sets the thresholds for the webinterface + self.process_rqtl_perm_results(perm_data_frame) # Functions that sets the thresholds for the webinterface return self.process_rqtl_results(result_data_frame) @@ -318,7 +326,11 @@ class MarkerRegression(object): userinputS = self.control.replace(" ", "").split(",") # TODO sanitize user input, Never Ever trust a user covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) print("Marker names of selected covariates:", covariate_names) - ro.r('covariates <- genotypes[,c(' + covariate_names + ')]') # Get the covariate matrix by using the marker name as index to the genotype file + ro.r('covnames <- c(' + covariate_names + ')') + ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') + ro.r('covnames <- covnames[covInGeno]') + ro.r("cat('covnames (purged): ', covnames,'\n')") + ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file print("R/qtl matrix of covariates:", ro.r["covariates"]) return ro.r["covariates"] -- cgit v1.2.3