diff options
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 97 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 3 |
2 files changed, 62 insertions, 38 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 140da0c5..60bc721e 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -228,33 +228,64 @@ class MarkerRegression(object): os.system(rqtl_command) count, p_values = self.parse_rqtl_output(plink_output_filename) + + def geno_to_rqtl_function(self): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + print("Adding a function to the R environment") + ro.r(""" + getGenoCode <- function(header, name = 'unk'){ + mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) + return(strsplit(header[mat],'')[[1]][6]) + } + + GENOtoCSVR <- function(genotypes = 'BXD.geno', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ + header = readLines(genotypes, 40) + toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers + genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes + + genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') + cat('Genodata:', toskip, " ", dim(genodata), '\n') + if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one + if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males + outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype + c('sex', '', '', sex), # Sex phenotype for the mice + cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes + write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file + require(qtl) + return(read.cross(file=out, 'csvr', genotypes=genocodes)) # Load it using R/qtl read.cross + } + """) def run_rqtl_geno(self): - print("Calling R/qtl from python") + self.geno_to_rqtl_function() + ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function - + print(r_library("qtl")) # Load R/qtl - + ## Get pointers to some R/qtl functions scanone = ro.r["scanone"] # Map the scanone function calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function read_cross = ro.r["read.cross"] # Map the read.cross function write_cross = ro.r["write.cross"] # Map the write.cross function + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the GENOtoCSVR function + + genofilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".geno" + crossfilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".cross" + print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) - cross_object = read_cross(file = "BXD.csvr", format = "csvr", dir="/home/zas1024/PLINK2RQTL/test", genotypes = r_c("B","H","D")) + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if self.manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") - - # Add the phenotype - cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype()) + + cross_object = self.add_phenotype(cross_object, self.sanitize_rqtl_phenotype()) # Add the phenotype # for debug: write_cross(cross_object, "csvr", "test.csvr") @@ -265,42 +296,36 @@ class MarkerRegression(object): else: result_data_frame = scanone(cross_object, pheno = "the_pheno") - if int(self.num_perm) > 0: - # Do permutation + if int(self.num_perm) > 0: # Do permutation (if requested by user) if(self.control.replace(" ", "") != ""): - covar = self.create_covariatesShort(cross_object) - perm_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, n_perm=int(self.num_perm)) + covar = self.create_covariates(cross_object) + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm=int(self.num_perm)) else: - perm_data_frame = scanone(cross_object, pheno = "the_pheno", n_perm=int(self.num_perm)) - - self.suggestive, self.significant = self.process_rqtl_perm_results(perm_data_frame) + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm=int(self.num_perm)) - qtl_results = self.process_rqtl_results(result_data_frame) + self.process_rqtl_perm_results(perm_data_frame) # Functions that sets the thresholds for the webinterface - return qtl_results + return self.process_rqtl_results(result_data_frame) def add_phenotype(self, cross, pheno_as_string): ro.globalenv["the_cross"] = cross ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')') return ro.r["the_cross"] - def create_covariates(self, cross): ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get genotype matrix - userinputS = self.control.replace(" ", "").split(",") # TODO sanitize user input !!! never trust a user - covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) - print(covariate_names) - ro.r('covariates <- genotypes[,c(' + covariate_names + ')]') # get covariate matrix, - print("COVARIATES:", ro.r["covariates"]) + ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix + userinputS = self.control.replace(" ", "").split(",") # TODO sanitize user input, Never Ever trust a user + covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) + print("Marker names of selected covariates:", covariate_names) + ro.r('covariates <- genotypes[,c(' + covariate_names + ')]') # Get the covariate matrix by using the marker name as index to the genotype file + print("R/qtl matrix of covariates:", ro.r["covariates"]) return ro.r["covariates"] def sanitize_rqtl_phenotype(self): pheno_as_string = "c(" - null_pos = [] for i, val in enumerate(self.vals): if val == "x": - null_pos.append(i) if i < (len(self.vals) - 1): pheno_as_string += "NA," else: @@ -313,35 +338,33 @@ class MarkerRegression(object): pheno_as_string += ")" return pheno_as_string - def process_rqtl_results(self, result): - #TODO how to make this a one liner and not copy the stuff in a loop + def process_rqtl_results(self, result): # TODO: how to make this a one liner and not copy the stuff in a loop qtl_results = [] - + output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - print("output", output) - + print("R/qtl scanone output:", output) + for i, line in enumerate(result.iter_row()): marker = {} marker['name'] = result.rownames[i] marker['chr'] = output[i][0] marker['Mb'] = output[i][1] marker['lod_score'] = output[i][2] - qtl_results.append(marker) - + return qtl_results - + def process_rqtl_perm_results(self, results): perm_vals = [] for line in str(results).split("\n")[1:(int(self.num_perm)+1)]: - print("line:", line.split()) + print("R/qtl permutation line:", line.split()) perm_vals.append(float(line.split()[1])) - + self.suggestive = np.percentile(np.array(perm_vals), 67) self.significant = np.percentile(np.array(perm_vals), 95) - + return self.suggestive, self.significant - + def run_plink(self): diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 99a5a940..a9744e72 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -733,12 +733,13 @@ def gn2_redis(key,species): tempdata = temp_data.TempData(params['temp_uuid']) - print('kinship', np.array(params['kinship_matrix'])) + print('pheno', np.array(params['pheno_vector'])) # sys.exit(1) if species == "human" : + print('kinship', np.array(params['kinship_matrix'])) ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), covariate_matrix = np.array(params['covariate_matrix']), plink_input_file = params['input_file_name'], |