From cfc1378f767b4bad12af6ffb38f0ec9e1db12359 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Wed, 18 Mar 2015 19:08:14 +0000
Subject: Adding a fix to purge non-existing markers as covariates for the
 R/qtl analysis

---
 .../wqflask/marker_regression/marker_regression.py | 46 ++++++++++++++--------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 60bc721e..9a3ff073 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -76,7 +76,10 @@ class MarkerRegression(object):
         elif self.mapping_method == "rqtl_plink":
             qtl_results = self.run_rqtl_plink()
         elif self.mapping_method == "rqtl_geno":
-            self.num_perm = start_vars['num_perm']
+            if start_vars['num_perm'] == "":
+                self.num_perm = 0
+            else:
+                self.num_perm = start_vars['num_perm']
             self.control = start_vars['control_marker']
 
             print("DOING RQTL GENO")
@@ -232,18 +235,21 @@ class MarkerRegression(object):
     def geno_to_rqtl_function(self):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
         print("Adding a function to the R environment")
         ro.r("""
+           trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
+
            getGenoCode <- function(header, name = 'unk'){
              mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
-             return(strsplit(header[mat],'')[[1]][6])
+             return(trim(strsplit(header[mat],':')[[1]][2]))
            }
 
            GENOtoCSVR <- function(genotypes = 'BXD.geno', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
              header = readLines(genotypes, 40)
              toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
+             
              genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))                # Get the genotype codes 
-
+             type <- getGenoCode(header, 'type')
              genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
-             cat('Genodata:', toskip, " ", dim(genodata), '\n')
+             cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
              if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4))                                                     # If there isn't a phenotype, generate a random one
              if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
              outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
@@ -251,7 +257,9 @@ class MarkerRegression(object):
                               cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)]))                          # Genotypes
              write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
              require(qtl)
-             return(read.cross(file=out, 'csvr', genotypes=genocodes))                                                  # Load it using R/qtl read.cross  
+             cross = read.cross(file=out, 'csvr', genotypes=genocodes)
+             if(type == 'riset') cross <- convert2riself(cross)
+             return(cross)                                                  # Load it using R/qtl read.cross  
           }
         """)
     
@@ -263,6 +271,7 @@ class MarkerRegression(object):
         ## Get pointers to some common R functions
         r_library     = ro.r["library"]             # Map the library function
         r_c           = ro.r["c"]                   # Map the c function
+        r_sum           = ro.r["sum"]             # Map the ncol function
 
         print(r_library("qtl"))                     # Load R/qtl
 
@@ -285,25 +294,24 @@ class MarkerRegression(object):
         else:
             cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
 
-        cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())             # Add the phenotype
+        cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())                 # Add the phenotype
 
         # for debug: write_cross(cross_object, "csvr", "test.csvr")
 
         # Scan for QTLs
-        if(self.control.replace(" ", "") != ""):
-            covar = self.create_covariates(cross_object)
-            result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar)
+        covar = self.create_covariates(cross_object)
+        if(r_sum(covar)[0] > 0):
+            print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar)
         else:
-            result_data_frame = scanone(cross_object, pheno = "the_pheno")
+            print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno")
 
-        if int(self.num_perm) > 0:                                                                  # Do permutation (if requested by user)
-            if(self.control.replace(" ", "") != ""):
-                covar = self.create_covariates(cross_object)
-                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm=int(self.num_perm))
+        if int(self.num_perm) > 0:                                                                      # Do permutation (if requested by user)
+            if(r_sum(covar)[0] > 0):
+                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = int(self.num_perm))
             else:
-                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm=int(self.num_perm))
+                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = int(self.num_perm))
 
-        self.process_rqtl_perm_results(perm_data_frame)                                             # Functions that sets the thresholds for the webinterface
+            self.process_rqtl_perm_results(perm_data_frame)                                             # Functions that sets the thresholds for the webinterface
 
         return self.process_rqtl_results(result_data_frame)
 
@@ -318,7 +326,11 @@ class MarkerRegression(object):
         userinputS = self.control.replace(" ", "").split(",")                 # TODO sanitize user input, Never Ever trust a user
         covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
         print("Marker names of selected covariates:", covariate_names)
-        ro.r('covariates <- genotypes[,c(' + covariate_names + ')]')          # Get the covariate matrix by using the marker name as index to the genotype file
+        ro.r('covnames <- c(' + covariate_names + ')')
+        ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
+        ro.r('covnames <- covnames[covInGeno]')
+        ro.r("cat('covnames (purged): ', covnames,'\n')")
+        ro.r('covariates <- genotypes[,covnames]')          # Get the covariate matrix by using the marker name as index to the genotype file
         print("R/qtl matrix of covariates:", ro.r["covariates"])
         return ro.r["covariates"]
 
-- 
cgit 1.4.1