From 0e240cbfc6034b65b42c2d21f59ae2663dd2f6ec Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 27 Mar 2020 16:32:08 -0500 Subject: Added integration for using RData files with R/qtl, plus some other changes to how it decides which mapping methods to display on the trait page --- wqflask/base/data_set.py | 11 ++- wqflask/utility/gen_geno_ob.py | 91 +++++++++++++++------- .../marker_regression/display_mapping_results.py | 86 ++++++++++---------- wqflask/wqflask/marker_regression/rqtl_mapping.py | 40 +++++++--- wqflask/wqflask/marker_regression/run_mapping.py | 3 +- wqflask/wqflask/templates/collections/view.html | 4 +- .../templates/show_trait_mapping_tools.html | 39 +++++----- 7 files changed, 168 insertions(+), 106 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 7fe9a8ac..abfdd277 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -307,9 +307,11 @@ class DatasetGroup(object): mapping_id = g.db.execute("select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0] if mapping_id == "1": - mapping_names = ["QTLReaper", "R/qtl"] + mapping_names = ["GEMMA", "QTLReaper", "R/qtl"] elif mapping_id == "2": - mapping_names = ["GEMMA"] + mapping_names = [] + elif mapping_id == "3": + mapping_names = ["R/qtl"] elif mapping_id == "4": mapping_names = ["GEMMA", "PLINK"] else: @@ -392,7 +394,10 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: - full_filename = str(locate(self.genofile, 'genotype')) + if "RData" in self.genofile: #ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData + full_filename = str(locate(self.genofile.split(".")[0] + ".geno", 'genotype')) + else: + full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 44e2722f..aa5b27c4 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,5 +1,8 @@ from __future__ import absolute_import, division, print_function +import utility.logger +logger = utility.logger.getLogger(__name__ ) + class genotype(object): """ Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure @@ -34,8 +37,34 @@ class genotype(object): def __len__(self): return len(self.chromosomes) - def read_file(self, filename): + def read_rdata_output(self, qtl_results): + #ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results + self.chromosomes = [] #ZS: Overwriting since the .geno file's contents are just placeholders + + this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + chr_ob = None + for marker in qtl_results: + locus = Locus(self) + if str(marker['chr']) != this_chr: + if this_chr != "": + self.chromosomes.append(chr_ob) + this_chr = str(marker['chr']) + chr_ob = Chr(this_chr, self) + if 'chr' in marker: + locus.chr = str(marker['chr']) + if 'name' in marker: + locus.name = marker['name'] + if 'Mb' in marker: + locus.Mb = marker['Mb'] + if 'cM' in marker: + locus.cM = marker['cM'] + chr_ob.loci.append(locus) + + self.chromosomes.append(chr_ob) + + return self + def read_file(self, filename): with open(filename, 'r') as geno_file: lines = geno_file.readlines() @@ -109,33 +138,39 @@ class Chr(object): return len(self.loci) def add_marker(self, marker_row): - self.loci.append(Locus(marker_row, self.geno_ob)) + self.loci.append(Locus(self.geno_ob, marker_row)) class Locus(object): - def __init__(self, marker_row, geno_ob): - self.chr = marker_row[0] - self.name = marker_row[1] - try: - self.cM = float(marker_row[geno_ob.cm_column]) - except: - self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None - - geno_table = { - geno_ob.mat: -1, - geno_ob.pat: 1, - geno_ob.het: 0, - geno_ob.unk: "U" - } - + def __init__(self, geno_ob, marker_row = None): + self.chr = None + self.name = None + self.cM = None + self.Mb = None self.genotype = [] - if geno_ob.mb_exists: - start_pos = 4 - else: - start_pos = 3 - - for allele in marker_row[start_pos:]: - if allele in geno_table.keys(): - self.genotype.append(geno_table[allele]) - else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown - self.genotype.append("U") \ No newline at end of file + if marker_row: + self.chr = marker_row[0] + self.name = marker_row[1] + try: + self.cM = float(marker_row[geno_ob.cm_column]) + except: + self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 + self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + + geno_table = { + geno_ob.mat: -1, + geno_ob.pat: 1, + geno_ob.het: 0, + geno_ob.unk: "U" + } + + self.genotype = [] + if geno_ob.mb_exists: + start_pos = 4 + else: + start_pos = 3 + + for allele in marker_row[start_pos:]: + if allele in geno_table.keys(): + self.genotype.append(geno_table[allele]) + else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown + self.genotype.append("U") \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 66b5e25e..cf4508dd 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -247,11 +247,29 @@ class DisplayMappingResults(object): self.strainlist = start_vars['samples'] + self.traitList = [] + thisTrait = start_vars['this_trait'] + self.traitList.append(thisTrait) + + ################################################################ + # Calculations QTL goes here + ################################################################ + self.multipleInterval = len(self.traitList) > 1 + self.qtlresults = start_vars['qtl_results'] + + if self.multipleInterval: + self.colorCollection = Plot.colorSpectrum(len(self.qtlresults)) + else: + self.colorCollection = [self.LRS_COLOR] + if self.mapping_method == "reaper" and self.manhattan_plot != True: self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() + if self.mapping_method == "rqtl_geno" and self.genotype.filler == True: + self.genotype = self.genotype.read_rdata_output(self.qtlresults) + #Darwing Options try: if self.selectedChr > -1: @@ -346,10 +364,6 @@ class DisplayMappingResults(object): else: self.GraphInterval = self.cMGraphInterval #cM - self.traitList = [] - thisTrait = start_vars['this_trait'] - self.traitList.append(thisTrait) - ## BEGIN HaplotypeAnalyst ## count the amount of individuals to be plotted, and increase self.graphHeight if self.haplotypeAnalystChecked and self.selectedChr > -1: @@ -371,16 +385,7 @@ class DisplayMappingResults(object): self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS+10) * self.EACH_GENE_HEIGHT ## END HaplotypeAnalyst - ################################################################ - # Calculations QTL goes here - ################################################################ - self.multipleInterval = len(self.traitList) > 1 - self.qtlresults = start_vars['qtl_results'] - if self.multipleInterval: - self.colorCollection = Plot.colorSpectrum(len(self.qtlresults)) - else: - self.colorCollection = [self.LRS_COLOR] ######################### @@ -1713,12 +1718,12 @@ class DisplayMappingResults(object): LRSLODFont=pid.Font(ttf="verdana", size=18*zoom*1.5, bold=0) yZero = yTopOffset + plotHeight - #LRSHeightThresh = drawAreaHeight - #AdditiveHeightThresh = drawAreaHeight/2 - #DominanceHeightThresh = drawAreaHeight/2 - LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) - AdditiveHeightThresh = LRSHeightThresh/2 - DominanceHeightThresh = LRSHeightThresh/2 + LRSHeightThresh = drawAreaHeight + AdditiveHeightThresh = drawAreaHeight/2 + DominanceHeightThresh = drawAreaHeight/2 + # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) + # AdditiveHeightThresh = LRSHeightThresh/2 + # DominanceHeightThresh = LRSHeightThresh/2 if LRS_LOD_Max > 100: LRSScale = 20.0 @@ -1756,8 +1761,8 @@ class DisplayMappingResults(object): if LRS_LOD_Max == 0.0: LRS_LOD_Max = 0.000001 yTopOffset + 30*(zoom - 1) - #yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - yLRS = yZero - (item/LRSAxisList[-1]) * LRSHeightThresh + yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh + #yLRS = yZero - (item/LRSAxisList[-1]) * LRSHeightThresh canvas.drawLine(xLeftOffset, yLRS, xLeftOffset - 4, yLRS, color=self.LRS_COLOR, width=1*zoom) if all_int: scaleStr = "%d" % item @@ -1767,10 +1772,10 @@ class DisplayMappingResults(object): canvas.drawString(scaleStr, xLeftOffset-4-canvas.stringWidth(scaleStr, font=LRSScaleFont)-5, yLRS+3, font=LRSScaleFont, color=self.LRS_COLOR) if self.permChecked and self.nperm > 0 and not self.multipleInterval: - # significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max - # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max - significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] - suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] + significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max + suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max + # significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] + # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] startPosX = xLeftOffset #"Significant" and "Suggestive" Drawing Routine @@ -1887,36 +1892,35 @@ class DisplayMappingResults(object): # fix the over limit LRS graph issue since genotype trait may give infinite LRS; # for any lrs is over than 460(LRS max in this system), it will be reset to 460 - yLRS = yZero - (item/LRSAxisList[-1]) * (yZero - yTopOffset + 30*(zoom - 1)) - #yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh + yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh if 'lrs_value' in qtlresult: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-log(p)": if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) + Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) else: - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) + Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) else: if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max else: - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max else: if qtlresult['lod_score'] > 100 or qtlresult['lod_score']=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max else: if self.LRS_LOD == "LRS": - Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max else: - Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max if self.manhattan_plot == True: if self.selectedChr == -1 and (previous_chr_as_int % 2 == 1): diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 41d67012..2e3ea406 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -9,8 +9,6 @@ import utility.logger logger = utility.logger.getLogger(__name__ ) def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan): - geno_to_rqtl_function(dataset) - ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function @@ -21,16 +19,24 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, print(r_library("qtl")) # Load R/qtl ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + scanone = ro.r["scanone"] # Map the scanone function + scantwo = ro.r["scantwo"] # Map the scantwo function + calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function crossname = dataset.group.name - genofilelocation = locate(crossname + ".geno", "genotype") - crossfilelocation = TMPDIR + crossname + ".cross" - - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available + try: + generate_cross_from_rdata(dataset) + read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function + genofilelocation = locate(crossname + ".RData", "genotype/rdata") + cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function + except: + generate_cross_from_geno(dataset) + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + crossfilelocation = TMPDIR + crossname + ".cross" + genofilelocation = locate(crossname + ".geno", "genotype") + + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if manhattan_plot: cross_object = calc_genoprob(cross_object) @@ -71,7 +77,18 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, else: return process_rqtl_results(result_data_frame) -def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_rdata(dataset): + rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") + ro.r(""" + generate_cross_from_rdata <- function(filename = '%s') { + load(file=filename) + cross = cunique + return(cross) + } + """ % (rdata_location)) + +def generate_cross_from_geno(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -170,6 +187,7 @@ def process_rqtl_results(result): # TODO: how to make this a one liner an marker = {} marker['name'] = result.rownames[i] marker['chr'] = output[i][0] + marker['cM'] = output[i][1] marker['Mb'] = output[i][1] marker['lod_score'] = output[i][2] qtl_results.append(marker) diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index ba41ffc3..f03b046e 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -81,7 +81,6 @@ class RunMapping(object): self.vals.append(value) else: self.samples = [] - for sample in self.dataset.group.samplelist: # sample is actually the name of an individual if (len(genofile_samplelist) == 0) or (sample in genofile_samplelist): in_trait_data = False @@ -186,7 +185,7 @@ class RunMapping(object): self.showGenes = "ON" self.viewLegend = "ON" - self.dataset.group.get_markers() + #self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html index 97e46756..1be6539d 100644 --- a/wqflask/wqflask/templates/collections/view.html +++ b/wqflask/wqflask/templates/collections/view.html @@ -161,10 +161,10 @@ {% endblock %} {% block js %} + + - - diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index a806a8b3..ad7412b2 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -4,25 +4,18 @@