diff options
author | zsloan | 2020-05-06 13:22:41 -0500 |
---|---|---|
committer | zsloan | 2020-05-06 13:22:41 -0500 |
commit | 8a2f61a999e6dc653a9ca2de802137a1a4107e35 (patch) | |
tree | e5cb1cdb12fa0c676c1f9d4c8ecb26952f8a7cb1 | |
parent | dc8254552a38201c968b92e88c4d243209dce108 (diff) | |
download | genenetwork2-itp_covariate_testing.tar.gz |
Added function for getting trait data type, plus added mapping scale changes to Danny's partially fixed R/qtl codeitp_covariate_testing
-rw-r--r-- | wqflask/wqflask/marker_regression/rqtl_mapping.py | 62 |
1 files changed, 37 insertions, 25 deletions
diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index ad3a4cb7..dd35d89a 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -1,6 +1,9 @@ import rpy2.robjects as ro import rpy2.robjects.numpy2ri as np2r import numpy as np +import json + +from flask import g from base.webqtlConfig import TMPDIR from base.trait import GeneralTrait @@ -11,7 +14,7 @@ from utility.tools import locate, TEMPDIR import utility.logger logger = utility.logger.getLogger(__name__ ) -def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): +def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): logger.info("Start run_rqtl_geno"); ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function @@ -36,7 +39,13 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe # genofilelocation = locate(crossname + ".RData", "genotype/rdata") # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function #except: - generate_cross_from_geno(dataset) + + if mapping_scale == "morgan": + scale_units = "cM" + else: + scale_units = "Mb" + + generate_cross_from_geno(dataset, scale_units) GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossfilelocation = TMPDIR + crossname + ".cross" if dataset.group.genofile: @@ -104,11 +113,9 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface - the_scale = check_mapping_scale(genofilelocation) - return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale + return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) else: - the_scale = check_mapping_scale(genofilelocation) - return process_rqtl_results(result_data_frame, dataset.group.species), the_scale + return process_rqtl_results(result_data_frame, dataset.group.species) def generate_cross_from_rdata(dataset): rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") @@ -120,7 +127,7 @@ def generate_cross_from_rdata(dataset): } """ % (rdata_location)) -def generate_cross_from_geno(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -135,7 +142,7 @@ def generate_cross_from_geno(dataset): # TODO: Need to figure out why som if(type == '4-way'){ genocodes <- c('1','2','3','4') } else { - genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes + genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes } genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') @@ -143,14 +150,23 @@ def generate_cross_from_geno(dataset): # TODO: Need to figure out why som if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype c('sex', '', '', sex), # Sex phenotype for the mice - cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes + cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)])) # Genotypes write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file require(qtl) - cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross - if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl + if(type == '4-way'){ + cat('Loading in as 4-WAY\n') + cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE) # Load the created cross file using R/qtl read.cross + }else{ + cat('Loading in as normal\n') + cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross + } + if(type == 'riset'){ + cat('Converting to RISELF\n') + cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl + } return(cross) } - """ % (dataset.group.genofile)) + """ % (dataset.group.genofile, scale_units)) def add_perm_strata(cross, perm_strata): col_string = 'c("the_strata")' @@ -324,18 +340,14 @@ def process_rqtl_results(result, species_name): # TODO: how to make this return qtl_results -def check_mapping_scale(genofile_location): - scale = "physic" - with open(genofile_location, "r") as geno_fh: - for line in geno_fh: - if line[0] == "@" or line[0] == "#": +def get_trait_data_type(trait_db_string): + # Get a trait's type (numeric, categorical, etc) from the DB + the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" + results_json = g.db.execute(the_query).fetchone() - if "@scale" in line: - scale = line.split(":")[1].strip() - break - else: - continue - else: - break + results_ob = json.loads(results_json[0]) - return scale
\ No newline at end of file + if trait_db_string in results_ob: + return results_ob[trait_db_string] + else: + return "numeric" |