Improved rqtl code

author: Zachary Sloan 2015-03-17 18:08:44 +0000
committer: Zachary Sloan 2015-03-17 18:08:44 +0000
commit: 85ffbfeb6379120c25b0bf589d568bb1453f778b (patch)
tree: 7e915b6694ffc4facf4845772d3e4748303779f1
parent: b4ce1da2b8a376472ed87b80b97ef7ce1b3d9314 (diff)
download: genenetwork2-85ffbfeb6379120c25b0bf589d568bb1453f778b.tar.gz
1 files changed, 148 insertions, 36 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index fa680f5f..99f23676 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -13,7 +13,7 @@ import os
 import collections
 import uuid
 
-import rpy2.robjects as robjects
+import rpy2.robjects as ro
 import numpy as np
 from scipy import linalg
 
@@ -79,7 +79,7 @@ class MarkerRegression(object):
             self.num_perm = start_vars['num_perm']
             self.control = start_vars['control_marker']
 
-            print("doing rqtl_geno")
+            print("DOING RQTL GENO")
             qtl_results = self.run_rqtl_geno()
             print("qtl_results:", qtl_results)
         elif self.mapping_method == "plink":
@@ -88,8 +88,9 @@ class MarkerRegression(object):
         elif self.mapping_method == "pylmm":
             print("RUNNING PYLMM")
             self.num_perm = start_vars['num_perm']
-            if int(self.num_perm) > 0:
-	         self.run_permutations(str(temp_uuid))
+            if self.num_perm != "":
+                if int(self.num_perm) > 0:
+	             self.run_permutations(str(temp_uuid))
             qtl_results = self.gen_data(str(temp_uuid))
         else:
             print("RUNNING NOTHING")
@@ -229,6 +230,136 @@ class MarkerRegression(object):
         count, p_values = self.parse_rqtl_output(plink_output_filename)
     
     def run_rqtl_geno(self):
+
+        print("CORRECT FUNCTION BEING CALLED")
+
+        ## Get pointers to some common R functions
+        r_library     = ro.r["library"]             # Map the library function
+        r_c           = ro.r["c"]                   # Map the c function
+        
+        print(r_library("qtl"))                     # Load R/qtl
+  
+        ## Get pointers to some R/qtl functions
+        scanone         = ro.r["scanone"]               # Map the scanone function
+        calc_genoprob   = ro.r["calc.genoprob"]         # Map the calc.genoprob function
+        read_cross      = ro.r["read.cross"]            # Map the read.cross function
+        write_cross     = ro.r["write.cross"]           # Map the write.cross function
+
+
+        cross_object = read_cross(file = "BXD.csvr", format = "csvr", dir="/home/zas1024/PLINK2RQTL/test", genotypes = r_c("B","H","D"))
+
+        if self.manhattan_plot:
+            cross_object = calc_genoprob(cross_object)
+        else:
+            cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
+       
+        # Add the phenotype
+        cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())
+        # for debug: write_cross(cross_object, "csvr", "test.csvr")
+        # Scan for QTLs 
+        # TODO: use addcovar, and permutations
+        if(self.control.replace(" ", "") != ""):
+            covar = self.create_covariatesShort(cross_object)
+            result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar)
+        else:
+            result_data_frame = scanone(cross_object, pheno = "the_pheno")
+        qtl_results = self.process_rqtl_results(result_data_frame)
+
+        return qtl_results
+
+    def add_phenotype(self, cross, pheno_as_string):
+        ro.globalenv["the_cross"] = cross
+        ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
+        return ro.r["the_cross"]
+
+
+    def create_covariatesShort(self, cross):
+        ro.globalenv["the_cross"] = cross
+        ro.r('genotypes <- pull.geno(the_cross)')       # Get genotype matrix
+        userinputS = self.control.replace(" ", "").split(",")     # TODO sanitize user input !!! never trust a user
+        covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) 
+        print(covariate_names)
+        ro.r('covariates <- genotypes[,c(' + covariate_names + ')]')         # get covariate matrix, 
+        print("COVARIATES:", ro.r["covariates"])
+        return ro.r["covariates"]
+
+
+    def create_covariates(self):
+        if self.control != "":
+            control_markers = self.control.split(",")
+            control_string = ""
+            for i, control in enumerate(control_markers):
+                control_trait = GeneralTrait(name=str(control), dataset_name=str(self.dataset.group.name + "Geno"))
+                control_vals = []
+                for sample in self.dataset.group.samplelist:
+                    if sample in control_trait.data:
+                        control_vals.append(control_trait.data[sample].value)
+                    else:
+                        control_vals.append("x")
+                print("control_vals:", control_vals)
+                control_as_string = "c("
+                for j, val2 in enumerate(control_vals):
+                    if val2 == "x":
+                        if j < (len(control_vals) - 1):
+                            control_as_string +=  "NA,"
+                        else:
+                            control_as_string += "NA"
+                    else:
+                        if j < (len(control_vals) - 1):
+                            control_as_string += str(val2) + ","
+                        else:
+                            control_as_string += str(val2)
+                    #if i < (len(control_vals) - 1):
+                    #    control_as_string += str(new_val2) + ","
+                    #else:
+                    #    control_as_string += str(new_val2)
+                control_as_string += ")"
+                print("control_as_string:", control_as_string)
+                if i < (len(control_markers)-1):
+                    control_string += control_as_string + ","
+                else:
+                    control_string += control_as_string
+                    
+            robjects.r('covariates <- cbind( '+ control_string +')')
+
+
+    def sanitize_rqtl_phenotype(self):
+        pheno_as_string = "c("
+        null_pos = []
+        for i, val in enumerate(self.vals):
+            if val == "x":
+                null_pos.append(i)
+                if i < (len(self.vals) - 1):
+                    pheno_as_string +=  "NA,"
+                else:
+                    pheno_as_string += "NA"
+            else:
+                if i < (len(self.vals) - 1):
+                    pheno_as_string += str(val) + ","
+                else:
+                    pheno_as_string += str(val)
+        pheno_as_string += ")"
+        return pheno_as_string
+
+    def process_rqtl_results(self, result):
+        #TODO how to make this a one liner and not copy the stuff in a loop
+        qtl_results = []
+        
+        output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+        print("output", output)
+        
+        for i, line in enumerate(result.iter_row()):
+            marker = {}
+            marker['name'] = result.rownames[i]
+            marker['chr'] = output[i][0]
+            marker['Mb'] = output[i][1]
+            marker['lod_score'] = output[i][2]
+            
+            qtl_results.append(marker)
+            
+        return qtl_results
+
+    def run_rqtl_geno_old(self):
         robjects.packages.importr("qtl")
         robjects.r('the_cross <- read.cross(format="csvr", dir="/home/zas1024/PLINK2RQTL/test", file="BXD.csvr")')
         if self.manhattan_plot:
@@ -236,14 +367,6 @@ class MarkerRegression(object):
         else:
             robjects.r('the_cross <- calc.genoprob(the_cross, step=1, stepwidth="max")')
         pheno_as_string = "c("
-        #for i, val in enumerate(self.vals):
-        #    if val == "x":
-        #        new_val == "NULL"
-        #    else:
-        #        new_val = val
-        #    if i < (len(self.vals) - 1):
-        #        pheno_as_string += str(new_val) + ","
-        #    else: pheno_as_string += str(new_val)
         null_pos = []
         for i, val in enumerate(self.vals):
             if val == "x":
@@ -300,14 +423,19 @@ class MarkerRegression(object):
                     control_string += control_as_string
                     
             robjects.r('covariates <- cbind( '+ control_string +')')
-            
-            r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+', addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
+            #Scan for QTL
+            r_string = 'scanone(the_cross, pheno.col="the_pheno", addcovar=covariates)'
+            #r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+', addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
             print("r_string:", r_string)
-            
+
+            # extract the profile
+
             if int(self.num_perm) > 0:
+	         # Do permutation
+                r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
+                # Extract the tresholds
                 thresholds = robjects.r(r_string)
                 self.suggestive, self.significant = self.process_rqtl_perm_results(thresholds)
-                r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
                 
             #r_string = 'scanone(the_cross, pheno.col='+pheno_as_string+', addcovar='+control_as_string+')'
             
@@ -323,6 +451,8 @@ class MarkerRegression(object):
         result_data_frame = robjects.r(r_string)
         #print("results:", result_data_frame)
 
+        print("before process rqtl results")
+
         qtl_results = self.process_rqtl_results(result_data_frame)
 
         return qtl_results
@@ -339,24 +469,6 @@ class MarkerRegression(object):
         
         return self.suggestive, self.significant
             
-    
-    def process_rqtl_results(self, result):
-        qtl_results = []
-        
-        output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
-        print("output", output)
-        
-        
-        for i, line in enumerate(result.iter_row()):
-            marker = {}
-            marker['name'] = result.rownames[i]
-            marker['chr'] = output[i][0]
-            marker['Mb'] = output[i][1]
-            marker['lod_score'] = output[i][2]
-            
-            qtl_results.append(marker)
-            
-        return qtl_results
 
     def run_plink(self):
     
@@ -394,8 +506,8 @@ class MarkerRegression(object):
         self.dataset.group.markers.add_pvalues(p_values)
 
         return self.dataset.group.markers.markers
-    
-    
+   
+
     def gen_pheno_txt_file_plink(self, pheno_filename = ''):
         ped_sample_list = self.get_samples_from_ped_file()	
         output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")
author	Zachary Sloan	2015-03-17 18:08:44 +0000
committer	Zachary Sloan	2015-03-17 18:08:44 +0000
commit	85ffbfeb6379120c25b0bf589d568bb1453f778b (patch)
tree	7e915b6694ffc4facf4845772d3e4748303779f1
parent	b4ce1da2b8a376472ed87b80b97ef7ce1b3d9314 (diff)
download	genenetwork2-85ffbfeb6379120c25b0bf589d568bb1453f778b.tar.gz