diff options
author | zsloan | 2016-10-03 16:46:47 +0000 |
---|---|---|
committer | zsloan | 2016-10-03 16:46:47 +0000 |
commit | 5fd49a1e036b02eaa82bdb8ce747a3c2cbc8c8de (patch) | |
tree | b86a4cb006dc986f89b4d2b320949b76c7f4bbf5 | |
parent | 54b709a86ce83a36d3a00c976f5262602284a8c5 (diff) | |
download | genenetwork2-5fd49a1e036b02eaa82bdb8ce747a3c2cbc8c8de.tar.gz |
Moved rqtl mapping code out of marker_regression and into its own file.
Later we will want to run all mapping methods outside of the web server, though
-rw-r--r-- | wqflask/wqflask/marker_regression/marker_regression.py | 191 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/rqtl_mapping.py | 190 |
2 files changed, 192 insertions, 189 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index d2b27991..bb964961 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -35,7 +35,7 @@ from utility import helper_functions from utility import Plot, Bunch from utility import temp_data from utility.benchmark import Bench -from wqflask.marker_regression import gemma_mapping +from wqflask.marker_regression import gemma_mapping, rqtl_mapping from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR from utility.external import shell @@ -166,7 +166,7 @@ class MarkerRegression(object): self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True - results = self.run_rqtl_geno() + results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: @@ -349,193 +349,6 @@ class MarkerRegression(object): count, p_values = self.parse_rqtl_output(plink_output_filename) - def geno_to_rqtl_function(self): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly - - ro.r(""" - trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } - - getGenoCode <- function(header, name = 'unk'){ - mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) - return(trim(strsplit(header[mat],':')[[1]][2])) - } - - GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ - header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines - toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers - - genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes - type <- getGenoCode(header, 'type') - genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') - cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') - if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one - if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males - outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype - c('sex', '', '', sex), # Sex phenotype for the mice - cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes - write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file - require(qtl) - cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross - if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl - return(cross) - } - """ % (self.dataset.group.name + ".geno")) - - def run_rqtl_geno(self): - self.geno_to_rqtl_function() - - ## Get pointers to some common R functions - r_library = ro.r["library"] # Map the library function - r_c = ro.r["c"] # Map the c function - r_sum = ro.r["sum"] # Map the sum function - plot = ro.r["plot"] # Map the plot function - postscript = ro.r["postscript"] # Map the postscript function - png = ro.r["png"] # Map the png function - dev_off = ro.r["dev.off"] # Map the device off function - - print(r_library("qtl")) # Load R/qtl - - ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function - read_cross = ro.r["read.cross"] # Map the read.cross function - write_cross = ro.r["write.cross"] # Map the write.cross function - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function - - crossname = self.dataset.group.name - genofilelocation = locate(crossname + ".geno", "genotype") - crossfilelocation = TMPDIR + crossname + ".cross" - - #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) - - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available - - if self.manhattan_plot: - cross_object = calc_genoprob(cross_object) - else: - cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") - - cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype()) # Add the phenotype - - # for debug: write_cross(cross_object, "csvr", "test.csvr") - - # Scan for QTLs - covar = self.create_covariates(cross_object) # Create the additive covariate matrix - - if self.pair_scan: - if self.do_control == "true": # If sum(covar) > 0 we have a covariate matrix - print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=self.model, method=self.method, n_cluster = 16) - else: - print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=self.model, method=self.method, n_cluster = 16) - - #print("Pair scan results:", result_data_frame) - - self.pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=TEMPDIR+self.pair_scan_filename) - plot(result_data_frame) - dev_off() - - return self.process_pair_scan_results(result_data_frame) - - else: - if self.do_control == "true": - print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=self.model, method=self.method) - else: - print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=self.model, method=self.method) - - if self.num_perm > 0 and self.permCheck == "ON": # Do permutation (if requested by user) - if self.do_control == "true": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = self.num_perm, model=self.model, method=self.method) - else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = self.num_perm, model=self.model, method=self.method) - - self.process_rqtl_perm_results(perm_data_frame) # Functions that sets the thresholds for the webinterface - - return self.process_rqtl_results(result_data_frame) - - def add_phenotype(self, cross, pheno_as_string): - ro.globalenv["the_cross"] = cross - ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')') - return ro.r["the_cross"] - - def create_covariates(self, cross): - ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinputS = self.control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user - covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) - #print("Marker names of selected covariates:", covariate_names) - ro.r('covnames <- c(' + covariate_names + ')') - ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') - ro.r('covnames <- covnames[covInGeno]') - ro.r("cat('covnames (purged): ', covnames,'\n')") - ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file - #print("R/qtl matrix of covariates:", ro.r["covariates"]) - return ro.r["covariates"] - - def sanitize_rqtl_phenotype(self): - pheno_as_string = "c(" - for i, val in enumerate(self.vals): - if val == "x": - if i < (len(self.vals) - 1): - pheno_as_string += "NA," - else: - pheno_as_string += "NA" - else: - if i < (len(self.vals) - 1): - pheno_as_string += str(val) + "," - else: - pheno_as_string += str(val) - pheno_as_string += ")" - return pheno_as_string - - def process_pair_scan_results(self, result): - pair_scan_results = [] - - result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - #print("R/qtl scantwo output:", output) - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - marker['chr1'] = output[i][0] - marker['Mb'] = output[i][1] - marker['chr2'] = int(output[i][2]) - pair_scan_results.append(marker) - - #print("pair_scan_results:", pair_scan_results) - - return pair_scan_results - - def process_rqtl_results(self, result): # TODO: how to make this a one liner and not copy the stuff in a loop - qtl_results = [] - - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - #print("R/qtl scanone output:", output) - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - marker['chr'] = output[i][0] - marker['Mb'] = output[i][1] - marker['lod_score'] = output[i][2] - qtl_results.append(marker) - - return qtl_results - - def process_rqtl_perm_results(self, results): - perm_vals = [] - for line in str(results).split("\n")[1:(self.num_perm+1)]: - #print("R/qtl permutation line:", line.split()) - perm_vals.append(float(line.split()[1])) - - self.perm_output = perm_vals - self.suggestive = np.percentile(np.array(perm_vals), 67) - self.significant = np.percentile(np.array(perm_vals), 95) - - return self.suggestive, self.significant - - def run_plink(self): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py new file mode 100644 index 00000000..53ea6053 --- /dev/null +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -0,0 +1,190 @@ +import rpy2.robjects as ro
+
+from base.webqtlConfig import TMPDIR
+from utility import webqtlUtil
+from utility.tools import locate, TEMPDIR
+
+def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
+ geno_to_rqtl_function(dataset)
+
+ ## Get pointers to some common R functions
+ r_library = ro.r["library"] # Map the library function
+ r_c = ro.r["c"] # Map the c function
+ r_sum = ro.r["sum"] # Map the sum function
+ plot = ro.r["plot"] # Map the plot function
+ postscript = ro.r["postscript"] # Map the postscript function
+ png = ro.r["png"] # Map the png function
+ dev_off = ro.r["dev.off"] # Map the device off function
+
+ print(r_library("qtl")) # Load R/qtl
+
+ ## Get pointers to some R/qtl functions
+ scanone = ro.r["scanone"] # Map the scanone function
+ scantwo = ro.r["scantwo"] # Map the scantwo function
+ calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
+ read_cross = ro.r["read.cross"] # Map the read.cross function
+ write_cross = ro.r["write.cross"] # Map the write.cross function
+ GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
+
+ crossname = dataset.group.name
+ genofilelocation = locate(crossname + ".geno", "genotype")
+ crossfilelocation = TMPDIR + crossname + ".cross"
+
+ #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
+
+ cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available
+
+ if manhattan_plot:
+ cross_object = calc_genoprob(cross_object)
+ else:
+ cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
+
+ cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype
+
+ # for debug: write_cross(cross_object, "csvr", "test.csvr")
+
+ # Scan for QTLs
+ covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix
+
+ if pair_scan:
+ if do_control == "true": # If sum(covar) > 0 we have a covariate matrix
+ print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
+ else:
+ print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
+
+ #print("Pair scan results:", result_data_frame)
+
+ pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
+ png(file=TEMPDIR+pair_scan_filename)
+ plot(result_data_frame)
+ dev_off()
+
+ return process_pair_scan_results(result_data_frame)
+ else:
+ if do_control == "true":
+ print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
+ else:
+ print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
+
+ if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user)
+ if do_control == "true":
+ perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
+ else:
+ perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
+
+ process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface
+
+ return process_rqtl_results(result_data_frame)
+
+def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+
+ ro.r("""
+ trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
+
+ getGenoCode <- function(header, name = 'unk'){
+ mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
+ return(trim(strsplit(header[mat],':')[[1]][2]))
+ }
+
+ GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+ header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines
+ toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers
+
+ genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes
+ type <- getGenoCode(header, 'type')
+ genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
+ cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
+ if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one
+ if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males
+ outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype
+ c('sex', '', '', sex), # Sex phenotype for the mice
+ cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes
+ write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file
+ require(qtl)
+ cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross
+ if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl
+ return(cross)
+ }
+ """ % (dataset.group.name + ".geno"))
+
+def add_phenotype(cross, pheno_as_string):
+ ro.globalenv["the_cross"] = cross
+ ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
+ return ro.r["the_cross"]
+
+def create_covariates(control_marker, cross):
+ ro.globalenv["the_cross"] = cross
+ ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix
+ userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user
+ covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
+ #print("Marker names of selected covariates:", covariate_names)
+ ro.r('covnames <- c(' + covariate_names + ')')
+ ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
+ ro.r('covnames <- covnames[covInGeno]')
+ ro.r("cat('covnames (purged): ', covnames,'\n')")
+ ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file
+ #print("R/qtl matrix of covariates:", ro.r["covariates"])
+ return ro.r["covariates"]
+
+def sanitize_rqtl_phenotype(vals):
+ pheno_as_string = "c("
+ for i, val in enumerate(vals):
+ if val == "x":
+ if i < (len(vals) - 1):
+ pheno_as_string += "NA,"
+ else:
+ pheno_as_string += "NA"
+ else:
+ if i < (len(vals) - 1):
+ pheno_as_string += str(val) + ","
+ else:
+ pheno_as_string += str(val)
+ pheno_as_string += ")"
+ return pheno_as_string
+
+def process_pair_scan_results(result):
+ pair_scan_results = []
+
+ result = result[1]
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ #print("R/qtl scantwo output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr1'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['chr2'] = int(output[i][2])
+ pair_scan_results.append(marker)
+
+ #print("pair_scan_results:", pair_scan_results)
+
+ return pair_scan_results
+
+def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop
+ qtl_results = []
+
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ #print("R/qtl scanone output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['lod_score'] = output[i][2]
+ qtl_results.append(marker)
+
+ return qtl_results
+
+def process_rqtl_perm_results(num_perm, results):
+ perm_vals = []
+ for line in str(results).split("\n")[1:(num_perm+1)]:
+ #print("R/qtl permutation line:", line.split())
+ perm_vals.append(float(line.split()[1]))
+
+ perm_output = perm_vals
+ suggestive = np.percentile(np.array(perm_vals), 67)
+ significant = np.percentile(np.array(perm_vals), 95)
+
+ return perm_output, suggestive, significant
\ No newline at end of file |