From 77e3974eebfc48e49af07e07c08cd312edd34b99 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 29 Apr 2020 16:34:58 -0500 Subject: Changed a lot about how mapping scale is read and gave the option to change mapping scale when both scales are available --- .../marker_regression/display_mapping_results.py | 25 +++--- wqflask/wqflask/marker_regression/rqtl_mapping.py | 48 +++++------- wqflask/wqflask/marker_regression/run_mapping.py | 18 +++-- wqflask/wqflask/show_trait/show_trait.py | 89 +++++++++++++++++++++- .../new/javascript/show_trait_mapping_tools.js | 19 +++++ wqflask/wqflask/templates/mapping_results.html | 4 + .../templates/show_trait_mapping_tools.html | 22 +++++- 7 files changed, 177 insertions(+), 48 deletions(-) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 2a53b60e..f70bc555 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -265,14 +265,12 @@ class DisplayMappingResults(object): else: self.colorCollection = [self.LRS_COLOR] + self.dataset.group.genofile = self.genofile_string.split(":")[0] if self.mapping_method == "reaper" and self.manhattan_plot != True: self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() - #if self.mapping_method == "rqtl_geno" and self.genotype.filler == True: - # self.genotype = self.genotype.read_rdata_output(self.qtlresults) - #Darwing Options try: if self.selectedChr > -1: @@ -1761,9 +1759,9 @@ class DisplayMappingResults(object): break if all_int: - max_lrs_width = canvas.stringWidth("%d" % LRS_LOD_Max, font=LRSScaleFont) + 30 + max_lrs_width = canvas.stringWidth("%d" % LRS_LOD_Max, font=LRSScaleFont) + 40 else: - max_lrs_width = canvas.stringWidth("%2.1f" % LRS_LOD_Max, font=LRSScaleFont) + 20 + max_lrs_width = canvas.stringWidth("%2.1f" % LRS_LOD_Max, font=LRSScaleFont) + 30 #draw the "LRS" or "LOD" string to the left of the axis canvas.drawString(self.LRS_LOD, xLeftOffset - max_lrs_width - 15*(zoom-1), \ @@ -1899,13 +1897,16 @@ class DisplayMappingResults(object): this_chr = str(self.ChrList[self.selectedChr][1]+1) if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr: - if self.plotScale != "physic" and self.genotype.filler == True: - if self.selectedChr != -1: - start_cm = self.genotype[self.selectedChr - 1][0].cM - Xc = startPosX + (qtlresult['Mb'] - start_cm)*plotXScale - else: - start_cm = self.genotype[previous_chr_as_int][0].cM - Xc = startPosX + ((qtlresult['Mb']-start_cm-startMb)*plotXScale)*(((qtlresult['Mb']-start_cm-startMb)*plotXScale)/((qtlresult['Mb']-start_cm-startMb+self.GraphInterval)*plotXScale)) + if self.plotScale != "physic" and self.mapping_method == "reaper" and not self.manhattan_plot: + Xc = startPosX + (qtlresult['cM']-startMb)*plotXScale + if hasattr(self.genotype, "filler"): + if self.genotype.filler: + if self.selectedChr != -1: + start_cm = self.genotype[self.selectedChr - 1][0].cM + Xc = startPosX + (qtlresult['Mb'] - start_cm)*plotXScale + else: + start_cm = self.genotype[previous_chr_as_int][0].cM + Xc = startPosX + ((qtlresult['Mb']-start_cm-startMb)*plotXScale)*(((qtlresult['Mb']-start_cm-startMb)*plotXScale)/((qtlresult['Mb']-start_cm-startMb+self.GraphInterval)*plotXScale)) else: Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index c1a56787..8c294460 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -11,7 +11,7 @@ from utility.tools import locate, TEMPDIR import utility.logger logger = utility.logger.getLogger(__name__ ) -def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): +def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function @@ -33,7 +33,13 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe # genofilelocation = locate(crossname + ".RData", "genotype/rdata") # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function #except: - generate_cross_from_geno(dataset) + + if mapping_scale == "morgan": + scale_units = "cM" + else: + scale_units = "Mb" + + generate_cross_from_geno(dataset, scale_units) GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossfilelocation = TMPDIR + crossname + ".cross" if dataset.group.genofile: @@ -47,6 +53,8 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe else: cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") + logger.debug("VAL LEN:", len(vals)) + pheno_string = sanitize_rqtl_phenotype(vals) cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype @@ -94,11 +102,9 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface - the_scale = check_mapping_scale(genofilelocation) - return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale + return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) else: - the_scale = check_mapping_scale(genofilelocation) - return process_rqtl_results(result_data_frame, dataset.group.species), the_scale + return process_rqtl_results(result_data_frame, dataset.group.species) def generate_cross_from_rdata(dataset): rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") @@ -110,7 +116,7 @@ def generate_cross_from_rdata(dataset): } """ % (rdata_location)) -def generate_cross_from_geno(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -127,21 +133,23 @@ def generate_cross_from_geno(dataset): # TODO: Need to figure out why som type <- getGenoCode(header, 'type') if(type == '4-way'){ genocodes <- c('1','2','3','4') + genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#', crosstype="4way") } else { genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes + genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') } - genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype c('sex', '', '', sex), # Sex phenotype for the mice - cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes + cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)])) # Genotypes write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file require(qtl) if(type == '4-way'){ cat('Loading in as 4-WAY\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE) # Load the created cross file using R/qtl read.cross + cross = read.cross(file=out, 'csvr', genotypes=genocodes) + #cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE) # Load the created cross file using R/qtl read.cross }else{ cat('Loading in as normal\n') cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross @@ -152,7 +160,7 @@ def generate_cross_from_geno(dataset): # TODO: Need to figure out why som } return(cross) } - """ % (dataset.group.genofile)) + """ % (dataset.group.genofile, scale_units)) def add_perm_strata(cross, perm_strata): col_string = 'c("the_strata")' @@ -300,20 +308,4 @@ def process_rqtl_results(result, species_name): # TODO: how to make this marker['lod_score'] = output[i][2] qtl_results.append(marker) - return qtl_results - -def check_mapping_scale(genofile_location): - scale = "physic" - with open(genofile_location, "r") as geno_fh: - for line in geno_fh: - if line[0] == "@" or line[0] == "#": - - if "@scale" in line: - scale = line.split(":")[1].strip() - break - else: - continue - else: - break - - return scale \ No newline at end of file + return qtl_results \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 589be702..7449d8ce 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -156,6 +156,8 @@ class RunMapping(object): self.transform = "" self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" + if "mapping_scale" in start_vars: + self.mapping_scale = start_vars['mapping_scale'] self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] @@ -255,9 +257,9 @@ class RunMapping(object): #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results, self.mapping_scale = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: - results, self.mapping_scale = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: @@ -429,7 +431,7 @@ class RunMapping(object): with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) - chr_lengths = get_chr_lengths(self.mapping_scale, self.dataset, self.qtl_results) + chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) #ZS: For zooming into genome browser, need to pass chromosome name instead of number if self.dataset.group.species == "mouse": @@ -643,7 +645,7 @@ def geno_db_exists(this_dataset): except: return "False" -def get_chr_lengths(mapping_scale, dataset, qtl_results): +def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): chr_lengths = [] if mapping_scale == "physic": for i, the_chr in enumerate(dataset.species.chromosomes.chromosomes): @@ -666,8 +668,12 @@ def get_chr_lengths(mapping_scale, dataset, qtl_results): this_chr = chr_as_num highest_pos = 0 else: - if float(result['Mb']) > highest_pos: - highest_pos = float(result['Mb']) + if mapping_method == "reaper": + if float(result['cM']) > highest_pos: + highest_pos = float(result['cM']) + else: + if float(result['Mb']) > highest_pos: + highest_pos = float(result['Mb']) return chr_lengths diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 8883e627..40e344b8 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -22,6 +22,7 @@ from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList from utility import webqtlUtil, Plot, Bunch, helper_functions +from utility.tools import locate_ignore_error from base.trait import GeneralTrait from base import data_set from db import webqtlDatabaseFunction @@ -170,6 +171,17 @@ class ShowTrait(object): self.genofiles = self.dataset.group.get_genofiles() + if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: #ZS: No need to grab scales from .geno file unless it's using a mapping method that reads .geno files + if self.genofiles: + self.scales_in_geno = get_genotype_scales(self.genofiles) + else: + self.scales_in_geno = get_genotype_scales(self.dataset.group + ".geno") + + if len(self.scales_in_geno) < 2: + hddn['mapping_scale'] = self.scales_in_geno[self.scales_in_geno.keys()[0]][0] + else: + self.scales_in_geno = {} + self.has_num_cases = has_num_cases(self.this_trait) self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups, self.has_num_cases) @@ -239,6 +251,7 @@ class ShowTrait(object): #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2 hddn['do_control'] = False hddn['maf'] = 0.05 + hddn['mapping_scale'] = "physic" hddn['compare_traits'] = [] hddn['export_data'] = "" hddn['export_format'] = "excel" @@ -251,6 +264,7 @@ class ShowTrait(object): short_description = short_description, unit_type = trait_units, dataset_type = self.dataset.type, + scales_in_geno = self.scales_in_geno, data_scale = self.dataset.data_scale, sample_group_types = self.sample_group_types, sample_lists = sample_lists, @@ -597,4 +611,77 @@ def get_categorical_variables(this_trait, sample_list): if num_distinct < 10: categorical_var_list.append(sample_list.attributes[attribute].name) - return categorical_var_list \ No newline at end of file + return categorical_var_list + +def get_genotype_scales(genofiles): + geno_scales = {} + if type(genofiles) is list: + for the_file in genofiles: + file_location = the_file['location'] + geno_scales[file_location] = get_scales_from_genofile(file_location) + else: + geno_scales[genofiles] = get_scales_from_genofile(genofiles) + + return geno_scales + +def get_scales_from_genofile(file_location): + geno_path = locate_ignore_error(file_location, 'genotype') + + if not geno_path: #ZS: This is just to allow the code to run when + return [["physic", "Mb"]] + cm_and_mb_cols_exist = True + cm_column = None + mb_column = None + with open(geno_path, "r") as geno_fh: + for i, line in enumerate(geno_fh): + if line[0] == "#" or line[0] == "@": + if "@scale" in line: #ZS: If the scale is made explicit in the metadata, use that + scale = line.split(":")[1].strip() + if scale == "morgan": + return [["morgan", "cM"]] + else: + return [["physic", "Mb"]] + else: + continue + if line[:3] == "Chr": + first_marker_line = i + 1 + if line.split("\t")[2].strip() == "cM": + cm_column = 2 + elif line.split("\t")[3].strip() == "cM": + cm_column = 3 + if line.split("\t")[2].strip() == "Mb": + mb_column = 2 + elif line.split("\t")[3].strip() == "Mb": + mb_column = 3 + break + + #ZS: This attempts to check whether the cM and Mb columns are 'real', since some .geno files have one column be a copy of the other column, or have one column that is all 0s + cm_all_zero = True + mb_all_zero = True + cm_mb_all_equal = True + for i, line in enumerate(geno_fh): + if first_marker_line <= i < first_marker_line + 10: #ZS: I'm assuming there won't be more than 10 markers where the position is listed as 0 + if cm_column: + cm_val = line.split("\t")[cm_column].strip() + if cm_val != "0": + cm_all_zero = False + if mb_column: + mb_val = line.split("\t")[mb_column].strip() + if mb_val != "0": + mb_all_zero = False + if cm_column and mb_column: + if cm_val != mb_val: + cm_mb_all_equal = False + else: + if i > first_marker_line + 10: + break + + #ZS: This assumes that both won't be all zero, since if that's the case mapping shouldn't be an option to begin with + if mb_all_zero: + return [["morgan", "cM"]] + elif cm_mb_all_equal: + return [["physic", "Mb"]] + elif cm_and_mb_cols_exist: + return [["physic", "Mb"], ["morgan", "cM"]] + else: + return [["physic", "Mb"]] diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index 478ed87e..7176a0da 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -165,6 +165,7 @@ url = "/loading"; $('input[name=method]').val("rqtl_geno"); $('input[name=selected_chr]').val($('#chr_rqtl_geno').val()); + $('input[name=mapping_scale]').val($('#scale_rqtl_geno').val()); $('input[name=genofile]').val($('#genofile_rqtl_geno').val()); $('input[name=num_perm]').val($('input[name=num_perm_rqtl_geno]').val()); $('input[name=categorical_vars]').val(js_data.categorical_vars) @@ -210,6 +211,7 @@ url = "/loading"; $('input[name=method]').val("reaper"); $('input[name=selected_chr]').val($('#chr_reaper').val()); + $('input[name=mapping_scale]').val($('#scale_reaper').val()); $('input[name=genofile]').val($('#genofile_reaper').val()); $('input[name=num_perm]').val($('input[name=num_perm_reaper]').val()); $('input[name=control_marker]').val($('input[name=control_reaper]').val()); @@ -289,4 +291,21 @@ return toggle_enable_disable("#suggestive_lrs"); }); + $('#genofile_rqtl_geno').change(function() { + geno_location = $(this).children("option:selected").val().split(":")[0] + $('#scale_rqtl_geno').empty() + the_scales = js_data.scales_in_geno[geno_location] + for (var i = 0; i < the_scales.length; i++){ + $('#scale_rqtl_geno').append($("").attr("value", the_scales[i][0]).text(the_scales[i][1])); + } + }); + $('#genofile_reaper').change(function() { + geno_location = $(this).children("option:selected").val().split(":")[0] + $('#scale_reaper').empty() + the_scales = js_data.scales_in_geno[geno_location] + for (var i = 0; i < the_scales.length; i++){ + $('#scale_reaper').append($("").attr("value", the_scales[i][0]).text(the_scales[i][1])); + } + }); + }).call(this); diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html index 7e05be18..94ac0350 100644 --- a/wqflask/wqflask/templates/mapping_results.html +++ b/wqflask/wqflask/templates/mapping_results.html @@ -274,7 +274,11 @@ {% endif %}