Merge branch 'master' of https://github.com/zsloan/genenetwork2

author: Pjotr Prins 2015-03-18 19:41:13 +0300
committer: Pjotr Prins 2015-03-18 19:41:13 +0300
commit: 394c53b80a710e4413e4f131ba568b6980a401ac (patch)
tree: 373acf48af8030be4b3a3e5ec89aa2ca14f481d1 /wqflask
parent: 6da3abd96e0eeb29c5950bd27f7d852b3987524d (diff)
parent: 90cd6a9df350c197185c638729299959ac54d3b0 (diff)
download: genenetwork2-394c53b80a710e4413e4f131ba568b6980a401ac.tar.gz
2 files changed, 62 insertions, 38 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 140da0c5..60bc721e 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -228,33 +228,64 @@ class MarkerRegression(object):
         os.system(rqtl_command)
         
         count, p_values = self.parse_rqtl_output(plink_output_filename)
+
+    def geno_to_rqtl_function(self):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+        print("Adding a function to the R environment")
+        ro.r("""
+           getGenoCode <- function(header, name = 'unk'){
+             mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
+             return(strsplit(header[mat],'')[[1]][6])
+           }
+
+           GENOtoCSVR <- function(genotypes = 'BXD.geno', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+             header = readLines(genotypes, 40)
+             toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
+             genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))                # Get the genotype codes 
+
+             genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
+             cat('Genodata:', toskip, " ", dim(genodata), '\n')
+             if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4))                                                     # If there isn't a phenotype, generate a random one
+             if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
+             outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
+                              c('sex', '', '', sex),                                                                           # Sex phenotype for the mice
+                              cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)]))                          # Genotypes
+             write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
+             require(qtl)
+             return(read.cross(file=out, 'csvr', genotypes=genocodes))                                                  # Load it using R/qtl read.cross  
+          }
+        """)
     
     def run_rqtl_geno(self):
-
         print("Calling R/qtl from python")
 
+        self.geno_to_rqtl_function()
+
         ## Get pointers to some common R functions
         r_library     = ro.r["library"]             # Map the library function
         r_c           = ro.r["c"]                   # Map the c function
-        
+
         print(r_library("qtl"))                     # Load R/qtl
-  
+
         ## Get pointers to some R/qtl functions
         scanone         = ro.r["scanone"]               # Map the scanone function
         calc_genoprob   = ro.r["calc.genoprob"]         # Map the calc.genoprob function
         read_cross      = ro.r["read.cross"]            # Map the read.cross function
         write_cross     = ro.r["write.cross"]           # Map the write.cross function
+        GENOtoCSVR      = ro.r["GENOtoCSVR"]            # Map the GENOtoCSVR function
+
+        genofilelocation  = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".geno"
+        crossfilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".cross"
 
+        print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
 
-        cross_object = read_cross(file = "BXD.csvr", format = "csvr", dir="/home/zas1024/PLINK2RQTL/test", genotypes = r_c("B","H","D"))
+        cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)  # TODO: Add the SEX if that is available
 
         if self.manhattan_plot:
             cross_object = calc_genoprob(cross_object)
         else:
             cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
-       
-        # Add the phenotype
-        cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())
+
+        cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())             # Add the phenotype
 
         # for debug: write_cross(cross_object, "csvr", "test.csvr")
 
@@ -265,42 +296,36 @@ class MarkerRegression(object):
         else:
             result_data_frame = scanone(cross_object, pheno = "the_pheno")
 
-        if int(self.num_perm) > 0:
-	     # Do permutation
+        if int(self.num_perm) > 0:                                                                  # Do permutation (if requested by user)
             if(self.control.replace(" ", "") != ""):
-                covar = self.create_covariatesShort(cross_object)
-                perm_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, n_perm=int(self.num_perm))
+                covar = self.create_covariates(cross_object)
+                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm=int(self.num_perm))
             else:
-                perm_data_frame = scanone(cross_object, pheno = "the_pheno", n_perm=int(self.num_perm))
-
-        self.suggestive, self.significant = self.process_rqtl_perm_results(perm_data_frame)
+                perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm=int(self.num_perm))
 
-        qtl_results = self.process_rqtl_results(result_data_frame)
+        self.process_rqtl_perm_results(perm_data_frame)                                             # Functions that sets the thresholds for the webinterface
 
-        return qtl_results
+        return self.process_rqtl_results(result_data_frame)
 
     def add_phenotype(self, cross, pheno_as_string):
         ro.globalenv["the_cross"] = cross
         ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
         return ro.r["the_cross"]
 
-
     def create_covariates(self, cross):
         ro.globalenv["the_cross"] = cross
-        ro.r('genotypes <- pull.geno(the_cross)')       # Get genotype matrix
-        userinputS = self.control.replace(" ", "").split(",")     # TODO sanitize user input !!! never trust a user
-        covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) 
-        print(covariate_names)
-        ro.r('covariates <- genotypes[,c(' + covariate_names + ')]')         # get covariate matrix, 
-        print("COVARIATES:", ro.r["covariates"])
+        ro.r('genotypes <- pull.geno(the_cross)')                             # Get the genotype matrix
+        userinputS = self.control.replace(" ", "").split(",")                 # TODO sanitize user input, Never Ever trust a user
+        covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
+        print("Marker names of selected covariates:", covariate_names)
+        ro.r('covariates <- genotypes[,c(' + covariate_names + ')]')          # Get the covariate matrix by using the marker name as index to the genotype file
+        print("R/qtl matrix of covariates:", ro.r["covariates"])
         return ro.r["covariates"]
 
     def sanitize_rqtl_phenotype(self):
         pheno_as_string = "c("
-        null_pos = []
         for i, val in enumerate(self.vals):
             if val == "x":
-                null_pos.append(i)
                 if i < (len(self.vals) - 1):
                     pheno_as_string +=  "NA,"
                 else:
@@ -313,35 +338,33 @@ class MarkerRegression(object):
         pheno_as_string += ")"
         return pheno_as_string
 
-    def process_rqtl_results(self, result):
-        #TODO how to make this a one liner and not copy the stuff in a loop
+    def process_rqtl_results(self, result):        # TODO: how to make this a one liner and not copy the stuff in a loop
         qtl_results = []
-        
+
         output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
-        print("output", output)
-        
+        print("R/qtl scanone output:", output)
+
         for i, line in enumerate(result.iter_row()):
             marker = {}
             marker['name'] = result.rownames[i]
             marker['chr'] = output[i][0]
             marker['Mb'] = output[i][1]
             marker['lod_score'] = output[i][2]
-            
             qtl_results.append(marker)
-            
+
         return qtl_results
-    
+
     def process_rqtl_perm_results(self, results):
         perm_vals = []
         for line in str(results).split("\n")[1:(int(self.num_perm)+1)]:
-            print("line:", line.split())
+            print("R/qtl permutation line:", line.split())
             perm_vals.append(float(line.split()[1]))
-        
+
         self.suggestive = np.percentile(np.array(perm_vals), 67)
         self.significant = np.percentile(np.array(perm_vals), 95)
-        
+
         return self.suggestive, self.significant
-            
+
 
     def run_plink(self):
     
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 99a5a940..a9744e72 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -733,12 +733,13 @@ def gn2_redis(key,species):
     
     tempdata = temp_data.TempData(params['temp_uuid'])
 
-    print('kinship', np.array(params['kinship_matrix']))
+    
     print('pheno', np.array(params['pheno_vector']))
 
     # sys.exit(1)
     
     if species == "human" :
+        print('kinship', np.array(params['kinship_matrix']))
         ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
                   covariate_matrix = np.array(params['covariate_matrix']),
                   plink_input_file = params['input_file_name'],
author	Pjotr Prins	2015-03-18 19:41:13 +0300
committer	Pjotr Prins	2015-03-18 19:41:13 +0300
commit	394c53b80a710e4413e4f131ba568b6980a401ac (patch)
tree	373acf48af8030be4b3a3e5ec89aa2ca14f481d1 /wqflask
parent	6da3abd96e0eeb29c5950bd27f7d852b3987524d (diff)
parent	90cd6a9df350c197185c638729299959ac54d3b0 (diff)
download	genenetwork2-394c53b80a710e4413e4f131ba568b6980a401ac.tar.gz