diff options
| author | zsloan | 2020-05-04 13:42:48 -0500 | 
|---|---|---|
| committer | zsloan | 2020-05-04 13:42:48 -0500 | 
| commit | a1db50c2a09e9666b18aeada21785c86e715b865 (patch) | |
| tree | 212e2961b1ae1c3f4fbbecec7debae8279f3ae26 | |
| parent | 5e23e579b731fd150d01511c622ffb6fd9dafdbc (diff) | |
| download | genenetwork2-a1db50c2a09e9666b18aeada21785c86e715b865.tar.gz | |
Committing some changes for testing covariates
| -rw-r--r-- | wqflask/wqflask/marker_regression/rqtl_mapping.py | 624 | ||||
| -rw-r--r-- | wqflask/wqflask/marker_regression/run_mapping.py | 8 | ||||
| -rw-r--r-- | wqflask/wqflask/show_trait/show_trait.py | 7 | 
3 files changed, 322 insertions, 317 deletions
| diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 8c294460..ed63ad92 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -1,311 +1,313 @@ -import rpy2.robjects as ro -import rpy2.robjects.numpy2ri as np2r -import numpy as np - -from base.webqtlConfig import TMPDIR -from base.trait import GeneralTrait -from base.data_set import create_dataset -from utility import webqtlUtil -from utility.tools import locate, TEMPDIR - -import utility.logger -logger = utility.logger.getLogger(__name__ ) - -def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): - ## Get pointers to some common R functions - r_library = ro.r["library"] # Map the library function - r_c = ro.r["c"] # Map the c function - plot = ro.r["plot"] # Map the plot function - png = ro.r["png"] # Map the png function - dev_off = ro.r["dev.off"] # Map the device off function - - print(r_library("qtl")) # Load R/qtl - - ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function - - crossname = dataset.group.name - #try: - # generate_cross_from_rdata(dataset) - # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function - # genofilelocation = locate(crossname + ".RData", "genotype/rdata") - # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function - #except: - - if mapping_scale == "morgan": - scale_units = "cM" - else: - scale_units = "Mb" - - generate_cross_from_geno(dataset, scale_units) - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function - crossfilelocation = TMPDIR + crossname + ".cross" - if dataset.group.genofile: - genofilelocation = locate(dataset.group.genofile, "genotype") - else: - genofilelocation = locate(dataset.group.name + ".geno", "genotype") - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available - - if manhattan_plot: - cross_object = calc_genoprob(cross_object) - else: - cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") - - logger.debug("VAL LEN:", len(vals)) - - pheno_string = sanitize_rqtl_phenotype(vals) - - cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype - - # Scan for QTLs - marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers - - if cofactors != "": - cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits - ro.r('all_covars <- cbind(marker_covars, trait_covars)') - else: - ro.r('all_covars <- marker_covars') - - covars = ro.r['all_covars'] - - if pair_scan: - if do_control == "true": - logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16) - else: - logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) - - pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=TEMPDIR+pair_scan_filename) - plot(result_data_frame) - dev_off() - - return process_pair_scan_results(result_data_frame) - else: - if do_control == "true" or cofactors != "": - logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) - else: - logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) - - if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) - if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping - cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) - if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) - else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method) - else: - if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method) - else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) - - perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface - return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) - else: - return process_rqtl_results(result_data_frame, dataset.group.species) - -def generate_cross_from_rdata(dataset): - rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") - ro.r(""" - generate_cross_from_rdata <- function(filename = '%s') { - load(file=filename) - cross = cunique - return(cross) - } - """ % (rdata_location)) - -def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly - - ro.r(""" - trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } - - getGenoCode <- function(header, name = 'unk'){ - mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) - return(trim(strsplit(header[mat],':')[[1]][2])) - } - - GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ - header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines - toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers - - type <- getGenoCode(header, 'type') - if(type == '4-way'){ - genocodes <- c('1','2','3','4') - genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#', crosstype="4way") - } else { - genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes - genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') - } - cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') - if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one - if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males - outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype - c('sex', '', '', sex), # Sex phenotype for the mice - cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)])) # Genotypes - write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file - require(qtl) - if(type == '4-way'){ - cat('Loading in as 4-WAY\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes) - #cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE) # Load the created cross file using R/qtl read.cross - }else{ - cat('Loading in as normal\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross - } - if(type == 'riset'){ - cat('Converting to RISELF\n') - cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl - } - return(cross) - } - """ % (dataset.group.genofile, scale_units)) - -def add_perm_strata(cross, perm_strata): - col_string = 'c("the_strata")' - perm_strata_string = "c(" - for item in perm_strata: - perm_strata_string += str(item) + "," - - perm_strata_string = perm_strata_string[:-1] + ")" - - cross = add_phenotype(cross, perm_strata_string, "the_strata") - - strata_ob = pull_var("perm_strata", cross, col_string) - - return cross, strata_ob - -def sanitize_rqtl_phenotype(vals): - pheno_as_string = "c(" - for i, val in enumerate(vals): - if val == "x": - if i < (len(vals) - 1): - pheno_as_string += "NA," - else: - pheno_as_string += "NA" - else: - if i < (len(vals) - 1): - pheno_as_string += str(val) + "," - else: - pheno_as_string += str(val) - pheno_as_string += ")" - - return pheno_as_string - -def add_phenotype(cross, pheno_as_string, col_name): - ro.globalenv["the_cross"] = cross - ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), ' + col_name + ' = '+ pheno_as_string +')') - return ro.r["the_cross"] - -def pull_var(var_name, cross, var_string): - ro.globalenv["the_cross"] = cross - ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')') - - return ro.r[var_name] - -def add_cofactors(cross, this_dataset, covariates, samples): - ro.numpy2ri.activate() - - covariate_list = covariates.split(",") - covar_name_string = "c(" - for i, covariate in enumerate(covariate_list): - this_covar_data = [] - covar_as_string = "c(" - trait_name = covariate.split(":")[0] - dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = GeneralTrait(dataset=dataset_ob, - name=trait_name, - cellid=None) - - this_dataset.group.get_samplelist() - trait_samples = this_dataset.group.samplelist - trait_sample_data = trait_ob.data - for index, sample in enumerate(trait_samples): - if sample in samples: - if sample in trait_sample_data: - sample_value = trait_sample_data[sample].value - this_covar_data.append(sample_value) - else: - this_covar_data.append("NA") - - for j, item in enumerate(this_covar_data): - if j < (len(this_covar_data) - 1): - covar_as_string += str(item) + "," - else: - covar_as_string += str(item) - - covar_as_string += ")" - - col_name = "covar_" + str(i) - cross = add_phenotype(cross, covar_as_string, col_name) - - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - covar_name_string += '"' + col_name + '"' - - covar_name_string += ")" - - covars_ob = pull_var("trait_covars", cross, covar_name_string) - - return cross, covars_ob - -def create_marker_covariates(control_marker, cross): - ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user - covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) - ro.r('covnames <- c(' + covariate_names + ')') - ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') - ro.r('covnames <- covnames[covInGeno]') - ro.r("cat('covnames (purged): ', covnames,'\n')") - ro.r('marker_covars <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file - - return ro.r["marker_covars"] - -def process_pair_scan_results(result): - pair_scan_results = [] - - result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - marker['chr1'] = output[i][0] - marker['Mb'] = output[i][1] - marker['chr2'] = int(output[i][2]) - pair_scan_results.append(marker) - - return pair_scan_results - -def process_rqtl_perm_results(num_perm, results): - perm_vals = [] - for line in str(results).split("\n")[1:(num_perm+1)]: - #print("R/qtl permutation line:", line.split()) - perm_vals.append(float(line.split()[1])) - - perm_output = perm_vals - suggestive = np.percentile(np.array(perm_vals), 67) - significant = np.percentile(np.array(perm_vals), 95) - - return perm_output, suggestive, significant - -def process_rqtl_results(result, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop - qtl_results = [] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file - marker['chr'] = "X" - else: - marker['chr'] = output[i][0] - marker['cM'] = output[i][1] - marker['Mb'] = output[i][1] - marker['lod_score'] = output[i][2] - qtl_results.append(marker) - - return qtl_results \ No newline at end of file +import rpy2.robjects as ro +import rpy2.robjects.numpy2ri as np2r +import numpy as np + +from base.webqtlConfig import TMPDIR +from base.trait import GeneralTrait +from base.data_set import create_dataset +from utility import webqtlUtil +from utility.tools import locate, TEMPDIR + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): + ## Get pointers to some common R functions + r_library = ro.r["library"] # Map the library function + r_c = ro.r["c"] # Map the c function + plot = ro.r["plot"] # Map the plot function + png = ro.r["png"] # Map the png function + dev_off = ro.r["dev.off"] # Map the device off function + + print(r_library("qtl")) # Load R/qtl + + ## Get pointers to some R/qtl functions + scanone = ro.r["scanone"] # Map the scanone function + scantwo = ro.r["scantwo"] # Map the scantwo function + calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function + + crossname = dataset.group.name + #try: + # generate_cross_from_rdata(dataset) + # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function + # genofilelocation = locate(crossname + ".RData", "genotype/rdata") + # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function + #except: + generate_cross_from_geno(dataset) + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + crossfilelocation = TMPDIR + crossname + ".cross" + if dataset.group.genofile: + genofilelocation = locate(dataset.group.genofile, "genotype") + else: + genofilelocation = locate(dataset.group.name + ".geno", "genotype") + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available + + the_version = ro.r["packageVersion('qtl')"] + logger.debug("THE R VERSION:", the_version) + + ro.r('save.image(file = "/home/zas1024/gn2-zach/tmp/HET3_cofactor_test2.RData")') + + if manhattan_plot: + cross_object = calc_genoprob(cross_object) + else: + cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") + + pheno_string = sanitize_rqtl_phenotype(vals) + + cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype + + # Scan for QTLs + marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers + + if cofactors != "": + cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits + ro.r('all_covars <- cbind(marker_covars, trait_covars)') + else: + ro.r('all_covars <- marker_covars') + + covars = ro.r['all_covars'] + + if pair_scan: + if do_control == "true": + logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16) + else: + logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) + + pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" + png(file=TEMPDIR+pair_scan_filename) + plot(result_data_frame) + dev_off() + + return process_pair_scan_results(result_data_frame) + else: + if do_control == "true" or cofactors != "": + logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) + else: + logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) + + if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) + if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping + cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) + if do_control == "true" or cofactors != "": + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) + else: + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method) + else: + if do_control == "true" or cofactors != "": + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method) + else: + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) + + perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface + the_scale = check_mapping_scale(genofilelocation) + return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale + else: + the_scale = check_mapping_scale(genofilelocation) + return process_rqtl_results(result_data_frame, dataset.group.species), the_scale + +def generate_cross_from_rdata(dataset): + rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") + ro.r(""" + generate_cross_from_rdata <- function(filename = '%s') { + load(file=filename) + cross = cunique + return(cross) + } + """ % (rdata_location)) + +def generate_cross_from_geno(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + + ro.r(""" + trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } + getGenoCode <- function(header, name = 'unk'){ + mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) + return(trim(strsplit(header[mat],':')[[1]][2])) + } + GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ + header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines + toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers + type <- getGenoCode(header, 'type') + if(type == '4-way'){ + genocodes <- c('1','2','3','4') + } else { + genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes + } + genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') + cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') + if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one + if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males + outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype + c('sex', '', '', sex), # Sex phenotype for the mice + cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes + write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file + require(qtl) + cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE) # Load the created cross file using R/qtl read.cross + #cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross + if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl + return(cross) + } + """ % (dataset.group.genofile)) + +def add_perm_strata(cross, perm_strata): + col_string = 'c("the_strata")' + perm_strata_string = "c(" + for item in perm_strata: + perm_strata_string += str(item) + "," + + perm_strata_string = perm_strata_string[:-1] + ")" + + cross = add_phenotype(cross, perm_strata_string, "the_strata") + + strata_ob = pull_var("perm_strata", cross, col_string) + + return cross, strata_ob + +def sanitize_rqtl_phenotype(vals): + pheno_as_string = "c(" + for i, val in enumerate(vals): + if val == "x": + if i < (len(vals) - 1): + pheno_as_string += "NA," + else: + pheno_as_string += "NA" + else: + if i < (len(vals) - 1): + pheno_as_string += str(val) + "," + else: + pheno_as_string += str(val) + pheno_as_string += ")" + + return pheno_as_string + +def add_phenotype(cross, pheno_as_string, col_name): + ro.globalenv["the_cross"] = cross + ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), ' + col_name + ' = '+ pheno_as_string +')') + return ro.r["the_cross"] + +def pull_var(var_name, cross, var_string): + ro.globalenv["the_cross"] = cross + ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')') + + return ro.r[var_name] + +def add_cofactors(cross, this_dataset, covariates, samples): + ro.numpy2ri.activate() + + covariate_list = covariates.split(",") + covar_name_string = "c(" + for i, covariate in enumerate(covariate_list): + this_covar_data = [] + covar_as_string = "c(" + trait_name = covariate.split(":")[0] + dataset_ob = create_dataset(covariate.split(":")[1]) + trait_ob = GeneralTrait(dataset=dataset_ob, + name=trait_name, + cellid=None) + + this_dataset.group.get_samplelist() + trait_samples = this_dataset.group.samplelist + trait_sample_data = trait_ob.data + for index, sample in enumerate(trait_samples): + if sample in samples: + if sample in trait_sample_data: + sample_value = trait_sample_data[sample].value + this_covar_data.append(sample_value) + else: + this_covar_data.append("NA") + + for j, item in enumerate(this_covar_data): + if j < (len(this_covar_data) - 1): + covar_as_string += str(item) + "," + else: + covar_as_string += str(item) + + covar_as_string += ")" + + col_name = "covar_" + str(i) + cross = add_phenotype(cross, covar_as_string, col_name) + + if i < (len(covariate_list) - 1): + covar_name_string += '"' + col_name + '", ' + else: + covar_name_string += '"' + col_name + '"' + + covar_name_string += ")" + + covars_ob = pull_var("trait_covars", cross, covar_name_string) + + return cross, covars_ob + +def create_marker_covariates(control_marker, cross): + ro.globalenv["the_cross"] = cross + ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix + userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) + ro.r('covnames <- c(' + covariate_names + ')') + ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') + ro.r('covnames <- covnames[covInGeno]') + ro.r("cat('covnames (purged): ', covnames,'\n')") + ro.r('marker_covars <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file + + return ro.r["marker_covars"] + +def process_pair_scan_results(result): + pair_scan_results = [] + + result = result[1] + output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + + for i, line in enumerate(result.iter_row()): + marker = {} + marker['name'] = result.rownames[i] + marker['chr1'] = output[i][0] + marker['Mb'] = output[i][1] + marker['chr2'] = int(output[i][2]) + pair_scan_results.append(marker) + + return pair_scan_results + +def process_rqtl_perm_results(num_perm, results): + perm_vals = [] + for line in str(results).split("\n")[1:(num_perm+1)]: + #print("R/qtl permutation line:", line.split()) + perm_vals.append(float(line.split()[1])) + + perm_output = perm_vals + suggestive = np.percentile(np.array(perm_vals), 67) + significant = np.percentile(np.array(perm_vals), 95) + + return perm_output, suggestive, significant + +def process_rqtl_results(result, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop + qtl_results = [] + output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + + for i, line in enumerate(result.iter_row()): + marker = {} + marker['name'] = result.rownames[i] + if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + marker['chr'] = "X" + else: + marker['chr'] = output[i][0] + marker['cM'] = output[i][1] + marker['Mb'] = output[i][1] + marker['lod_score'] = output[i][2] + qtl_results.append(marker) + + return qtl_results + +def check_mapping_scale(genofile_location): + scale = "physic" + with open(genofile_location, "r") as geno_fh: + for line in geno_fh: + if line[0] == "@" or line[0] == "#": + + if "@scale" in line: + scale = line.split(":")[1].strip() + break + else: + continue + else: + break + + return scale \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 5f7710ab..8fea295f 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -257,9 +257,13 @@ class RunMapping(object): #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: - results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + # if self.permCheck and self.num_perm > 0: + # self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + # else: + # results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 40e344b8..f73079f8 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -175,10 +175,7 @@ class ShowTrait(object): if self.genofiles: self.scales_in_geno = get_genotype_scales(self.genofiles) else: - self.scales_in_geno = get_genotype_scales(self.dataset.group + ".geno") - - if len(self.scales_in_geno) < 2: - hddn['mapping_scale'] = self.scales_in_geno[self.scales_in_geno.keys()[0]][0] + self.scales_in_geno = get_genotype_scales(self.dataset.group.name + ".geno") else: self.scales_in_geno = {} @@ -255,6 +252,8 @@ class ShowTrait(object): hddn['compare_traits'] = [] hddn['export_data'] = "" hddn['export_format'] = "excel" + if len(self.scales_in_geno) < 2: + hddn['mapping_scale'] = self.scales_in_geno[self.scales_in_geno.keys()[0]][0] # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn | 
