aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZachary Sloan2015-03-17 18:08:44 +0000
committerZachary Sloan2015-03-17 18:08:44 +0000
commit85ffbfeb6379120c25b0bf589d568bb1453f778b (patch)
tree7e915b6694ffc4facf4845772d3e4748303779f1
parentb4ce1da2b8a376472ed87b80b97ef7ce1b3d9314 (diff)
downloadgenenetwork2-85ffbfeb6379120c25b0bf589d568bb1453f778b.tar.gz
Improved rqtl code
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py184
1 files changed, 148 insertions, 36 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index fa680f5f..99f23676 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -13,7 +13,7 @@ import os
import collections
import uuid
-import rpy2.robjects as robjects
+import rpy2.robjects as ro
import numpy as np
from scipy import linalg
@@ -79,7 +79,7 @@ class MarkerRegression(object):
self.num_perm = start_vars['num_perm']
self.control = start_vars['control_marker']
- print("doing rqtl_geno")
+ print("DOING RQTL GENO")
qtl_results = self.run_rqtl_geno()
print("qtl_results:", qtl_results)
elif self.mapping_method == "plink":
@@ -88,8 +88,9 @@ class MarkerRegression(object):
elif self.mapping_method == "pylmm":
print("RUNNING PYLMM")
self.num_perm = start_vars['num_perm']
- if int(self.num_perm) > 0:
- self.run_permutations(str(temp_uuid))
+ if self.num_perm != "":
+ if int(self.num_perm) > 0:
+ self.run_permutations(str(temp_uuid))
qtl_results = self.gen_data(str(temp_uuid))
else:
print("RUNNING NOTHING")
@@ -229,6 +230,136 @@ class MarkerRegression(object):
count, p_values = self.parse_rqtl_output(plink_output_filename)
def run_rqtl_geno(self):
+
+ print("CORRECT FUNCTION BEING CALLED")
+
+ ## Get pointers to some common R functions
+ r_library = ro.r["library"] # Map the library function
+ r_c = ro.r["c"] # Map the c function
+
+ print(r_library("qtl")) # Load R/qtl
+
+ ## Get pointers to some R/qtl functions
+ scanone = ro.r["scanone"] # Map the scanone function
+ calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
+ read_cross = ro.r["read.cross"] # Map the read.cross function
+ write_cross = ro.r["write.cross"] # Map the write.cross function
+
+
+ cross_object = read_cross(file = "BXD.csvr", format = "csvr", dir="/home/zas1024/PLINK2RQTL/test", genotypes = r_c("B","H","D"))
+
+ if self.manhattan_plot:
+ cross_object = calc_genoprob(cross_object)
+ else:
+ cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
+
+ # Add the phenotype
+ cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype())
+ # for debug: write_cross(cross_object, "csvr", "test.csvr")
+ # Scan for QTLs
+ # TODO: use addcovar, and permutations
+ if(self.control.replace(" ", "") != ""):
+ covar = self.create_covariatesShort(cross_object)
+ result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar)
+ else:
+ result_data_frame = scanone(cross_object, pheno = "the_pheno")
+ qtl_results = self.process_rqtl_results(result_data_frame)
+
+ return qtl_results
+
+ def add_phenotype(self, cross, pheno_as_string):
+ ro.globalenv["the_cross"] = cross
+ ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
+ return ro.r["the_cross"]
+
+
+ def create_covariatesShort(self, cross):
+ ro.globalenv["the_cross"] = cross
+ ro.r('genotypes <- pull.geno(the_cross)') # Get genotype matrix
+ userinputS = self.control.replace(" ", "").split(",") # TODO sanitize user input !!! never trust a user
+ covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
+ print(covariate_names)
+ ro.r('covariates <- genotypes[,c(' + covariate_names + ')]') # get covariate matrix,
+ print("COVARIATES:", ro.r["covariates"])
+ return ro.r["covariates"]
+
+
+ def create_covariates(self):
+ if self.control != "":
+ control_markers = self.control.split(",")
+ control_string = ""
+ for i, control in enumerate(control_markers):
+ control_trait = GeneralTrait(name=str(control), dataset_name=str(self.dataset.group.name + "Geno"))
+ control_vals = []
+ for sample in self.dataset.group.samplelist:
+ if sample in control_trait.data:
+ control_vals.append(control_trait.data[sample].value)
+ else:
+ control_vals.append("x")
+ print("control_vals:", control_vals)
+ control_as_string = "c("
+ for j, val2 in enumerate(control_vals):
+ if val2 == "x":
+ if j < (len(control_vals) - 1):
+ control_as_string += "NA,"
+ else:
+ control_as_string += "NA"
+ else:
+ if j < (len(control_vals) - 1):
+ control_as_string += str(val2) + ","
+ else:
+ control_as_string += str(val2)
+ #if i < (len(control_vals) - 1):
+ # control_as_string += str(new_val2) + ","
+ #else:
+ # control_as_string += str(new_val2)
+ control_as_string += ")"
+ print("control_as_string:", control_as_string)
+ if i < (len(control_markers)-1):
+ control_string += control_as_string + ","
+ else:
+ control_string += control_as_string
+
+ robjects.r('covariates <- cbind( '+ control_string +')')
+
+
+ def sanitize_rqtl_phenotype(self):
+ pheno_as_string = "c("
+ null_pos = []
+ for i, val in enumerate(self.vals):
+ if val == "x":
+ null_pos.append(i)
+ if i < (len(self.vals) - 1):
+ pheno_as_string += "NA,"
+ else:
+ pheno_as_string += "NA"
+ else:
+ if i < (len(self.vals) - 1):
+ pheno_as_string += str(val) + ","
+ else:
+ pheno_as_string += str(val)
+ pheno_as_string += ")"
+ return pheno_as_string
+
+ def process_rqtl_results(self, result):
+ #TODO how to make this a one liner and not copy the stuff in a loop
+ qtl_results = []
+
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ print("output", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['lod_score'] = output[i][2]
+
+ qtl_results.append(marker)
+
+ return qtl_results
+
+ def run_rqtl_geno_old(self):
robjects.packages.importr("qtl")
robjects.r('the_cross <- read.cross(format="csvr", dir="/home/zas1024/PLINK2RQTL/test", file="BXD.csvr")')
if self.manhattan_plot:
@@ -236,14 +367,6 @@ class MarkerRegression(object):
else:
robjects.r('the_cross <- calc.genoprob(the_cross, step=1, stepwidth="max")')
pheno_as_string = "c("
- #for i, val in enumerate(self.vals):
- # if val == "x":
- # new_val == "NULL"
- # else:
- # new_val = val
- # if i < (len(self.vals) - 1):
- # pheno_as_string += str(new_val) + ","
- # else: pheno_as_string += str(new_val)
null_pos = []
for i, val in enumerate(self.vals):
if val == "x":
@@ -300,14 +423,19 @@ class MarkerRegression(object):
control_string += control_as_string
robjects.r('covariates <- cbind( '+ control_string +')')
-
- r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+', addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
+ #Scan for QTL
+ r_string = 'scanone(the_cross, pheno.col="the_pheno", addcovar=covariates)'
+ #r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+', addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
print("r_string:", r_string)
-
+
+ # extract the profile
+
if int(self.num_perm) > 0:
+ # Do permutation
+ r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
+ # Extract the tresholds
thresholds = robjects.r(r_string)
self.suggestive, self.significant = self.process_rqtl_perm_results(thresholds)
- r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
#r_string = 'scanone(the_cross, pheno.col='+pheno_as_string+', addcovar='+control_as_string+')'
@@ -323,6 +451,8 @@ class MarkerRegression(object):
result_data_frame = robjects.r(r_string)
#print("results:", result_data_frame)
+ print("before process rqtl results")
+
qtl_results = self.process_rqtl_results(result_data_frame)
return qtl_results
@@ -339,24 +469,6 @@ class MarkerRegression(object):
return self.suggestive, self.significant
-
- def process_rqtl_results(self, result):
- qtl_results = []
-
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- print("output", output)
-
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['lod_score'] = output[i][2]
-
- qtl_results.append(marker)
-
- return qtl_results
def run_plink(self):
@@ -394,8 +506,8 @@ class MarkerRegression(object):
self.dataset.group.markers.add_pvalues(p_values)
return self.dataset.group.markers.markers
-
-
+
+
def gen_pheno_txt_file_plink(self, pheno_filename = ''):
ped_sample_list = self.get_samples_from_ped_file()
output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")