From 0e240cbfc6034b65b42c2d21f59ae2663dd2f6ec Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 27 Mar 2020 16:32:08 -0500 Subject: Added integration for using RData files with R/qtl, plus some other changes to how it decides which mapping methods to display on the trait page --- wqflask/base/data_set.py | 11 ++- wqflask/utility/gen_geno_ob.py | 91 +++++++++++++++------- .../marker_regression/display_mapping_results.py | 86 ++++++++++---------- wqflask/wqflask/marker_regression/rqtl_mapping.py | 40 +++++++--- wqflask/wqflask/marker_regression/run_mapping.py | 3 +- wqflask/wqflask/templates/collections/view.html | 4 +- .../templates/show_trait_mapping_tools.html | 39 +++++----- 7 files changed, 168 insertions(+), 106 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 7fe9a8ac..abfdd277 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -307,9 +307,11 @@ class DatasetGroup(object): mapping_id = g.db.execute("select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0] if mapping_id == "1": - mapping_names = ["QTLReaper", "R/qtl"] + mapping_names = ["GEMMA", "QTLReaper", "R/qtl"] elif mapping_id == "2": - mapping_names = ["GEMMA"] + mapping_names = [] + elif mapping_id == "3": + mapping_names = ["R/qtl"] elif mapping_id == "4": mapping_names = ["GEMMA", "PLINK"] else: @@ -392,7 +394,10 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: - full_filename = str(locate(self.genofile, 'genotype')) + if "RData" in self.genofile: #ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData + full_filename = str(locate(self.genofile.split(".")[0] + ".geno", 'genotype')) + else: + full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 44e2722f..aa5b27c4 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,5 +1,8 @@ from __future__ import absolute_import, division, print_function +import utility.logger +logger = utility.logger.getLogger(__name__ ) + class genotype(object): """ Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure @@ -34,8 +37,34 @@ class genotype(object): def __len__(self): return len(self.chromosomes) - def read_file(self, filename): + def read_rdata_output(self, qtl_results): + #ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results + self.chromosomes = [] #ZS: Overwriting since the .geno file's contents are just placeholders + + this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + chr_ob = None + for marker in qtl_results: + locus = Locus(self) + if str(marker['chr']) != this_chr: + if this_chr != "": + self.chromosomes.append(chr_ob) + this_chr = str(marker['chr']) + chr_ob = Chr(this_chr, self) + if 'chr' in marker: + locus.chr = str(marker['chr']) + if 'name' in marker: + locus.name = marker['name'] + if 'Mb' in marker: + locus.Mb = marker['Mb'] + if 'cM' in marker: + locus.cM = marker['cM'] + chr_ob.loci.append(locus) + + self.chromosomes.append(chr_ob) + + return self + def read_file(self, filename): with open(filename, 'r') as geno_file: lines = geno_file.readlines() @@ -109,33 +138,39 @@ class Chr(object): return len(self.loci) def add_marker(self, marker_row): - self.loci.append(Locus(marker_row, self.geno_ob)) + self.loci.append(Locus(self.geno_ob, marker_row)) class Locus(object): - def __init__(self, marker_row, geno_ob): - self.chr = marker_row[0] - self.name = marker_row[1] - try: - self.cM = float(marker_row[geno_ob.cm_column]) - except: - self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None - - geno_table = { - geno_ob.mat: -1, - geno_ob.pat: 1, - geno_ob.het: 0, - geno_ob.unk: "U" - } - + def __init__(self, geno_ob, marker_row = None): + self.chr = None + self.name = None + self.cM = None + self.Mb = None self.genotype = [] - if geno_ob.mb_exists: - start_pos = 4 - else: - start_pos = 3 - - for allele in marker_row[start_pos:]: - if allele in geno_table.keys(): - self.genotype.append(geno_table[allele]) - else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown - self.genotype.append("U") \ No newline at end of file + if marker_row: + self.chr = marker_row[0] + self.name = marker_row[1] + try: + self.cM = float(marker_row[geno_ob.cm_column]) + except: + self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 + self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + + geno_table = { + geno_ob.mat: -1, + geno_ob.pat: 1, + geno_ob.het: 0, + geno_ob.unk: "U" + } + + self.genotype = [] + if geno_ob.mb_exists: + start_pos = 4 + else: + start_pos = 3 + + for allele in marker_row[start_pos:]: + if allele in geno_table.keys(): + self.genotype.append(geno_table[allele]) + else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown + self.genotype.append("U") \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 66b5e25e..cf4508dd 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -247,11 +247,29 @@ class DisplayMappingResults(object): self.strainlist = start_vars['samples'] + self.traitList = [] + thisTrait = start_vars['this_trait'] + self.traitList.append(thisTrait) + + ################################################################ + # Calculations QTL goes here + ################################################################ + self.multipleInterval = len(self.traitList) > 1 + self.qtlresults = start_vars['qtl_results'] + + if self.multipleInterval: + self.colorCollection = Plot.colorSpectrum(len(self.qtlresults)) + else: + self.colorCollection = [self.LRS_COLOR] + if self.mapping_method == "reaper" and self.manhattan_plot != True: self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() + if self.mapping_method == "rqtl_geno" and self.genotype.filler == True: + self.genotype = self.genotype.read_rdata_output(self.qtlresults) + #Darwing Options try: if self.selectedChr > -1: @@ -346,10 +364,6 @@ class DisplayMappingResults(object): else: self.GraphInterval = self.cMGraphInterval #cM - self.traitList = [] - thisTrait = start_vars['this_trait'] - self.traitList.append(thisTrait) - ## BEGIN HaplotypeAnalyst ## count the amount of individuals to be plotted, and increase self.graphHeight if self.haplotypeAnalystChecked and self.selectedChr > -1: @@ -371,16 +385,7 @@ class DisplayMappingResults(object): self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS+10) * self.EACH_GENE_HEIGHT ## END HaplotypeAnalyst - ################################################################ - # Calculations QTL goes here - ################################################################ - self.multipleInterval = len(self.traitList) > 1 - self.qtlresults = start_vars['qtl_results'] - if self.multipleInterval: - self.colorCollection = Plot.colorSpectrum(len(self.qtlresults)) - else: - self.colorCollection = [self.LRS_COLOR] ######################### @@ -1713,12 +1718,12 @@ class DisplayMappingResults(object): LRSLODFont=pid.Font(ttf="verdana", size=18*zoom*1.5, bold=0) yZero = yTopOffset + plotHeight - #LRSHeightThresh = drawAreaHeight - #AdditiveHeightThresh = drawAreaHeight/2 - #DominanceHeightThresh = drawAreaHeight/2 - LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) - AdditiveHeightThresh = LRSHeightThresh/2 - DominanceHeightThresh = LRSHeightThresh/2 + LRSHeightThresh = drawAreaHeight + AdditiveHeightThresh = drawAreaHeight/2 + DominanceHeightThresh = drawAreaHeight/2 + # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) + # AdditiveHeightThresh = LRSHeightThresh/2 + # DominanceHeightThresh = LRSHeightThresh/2 if LRS_LOD_Max > 100: LRSScale = 20.0 @@ -1756,8 +1761,8 @@ class DisplayMappingResults(object): if LRS_LOD_Max == 0.0: LRS_LOD_Max = 0.000001 yTopOffset + 30*(zoom - 1) - #yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - yLRS = yZero - (item/LRSAxisList[-1]) * LRSHeightThresh + yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh + #yLRS = yZero - (item/LRSAxisList[-1]) * LRSHeightThresh canvas.drawLine(xLeftOffset, yLRS, xLeftOffset - 4, yLRS, color=self.LRS_COLOR, width=1*zoom) if all_int: scaleStr = "%d" % item @@ -1767,10 +1772,10 @@ class DisplayMappingResults(object): canvas.drawString(scaleStr, xLeftOffset-4-canvas.stringWidth(scaleStr, font=LRSScaleFont)-5, yLRS+3, font=LRSScaleFont, color=self.LRS_COLOR) if self.permChecked and self.nperm > 0 and not self.multipleInterval: - # significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max - # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max - significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] - suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] + significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max + suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max + # significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] + # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] startPosX = xLeftOffset #"Significant" and "Suggestive" Drawing Routine @@ -1887,36 +1892,35 @@ class DisplayMappingResults(object): # fix the over limit LRS graph issue since genotype trait may give infinite LRS; # for any lrs is over than 460(LRS max in this system), it will be reset to 460 - yLRS = yZero - (item/LRSAxisList[-1]) * (yZero - yTopOffset + 30*(zoom - 1)) - #yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh + yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh if 'lrs_value' in qtlresult: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-log(p)": if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) + Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) else: - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) + Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) else: if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max else: - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max else: if qtlresult['lod_score'] > 100 or qtlresult['lod_score']=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max else: if self.LRS_LOD == "LRS": - Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max else: - Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] - #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max + #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] + Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max if self.manhattan_plot == True: if self.selectedChr == -1 and (previous_chr_as_int % 2 == 1): diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 41d67012..2e3ea406 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -9,8 +9,6 @@ import utility.logger logger = utility.logger.getLogger(__name__ ) def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan): - geno_to_rqtl_function(dataset) - ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function @@ -21,16 +19,24 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, print(r_library("qtl")) # Load R/qtl ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + scanone = ro.r["scanone"] # Map the scanone function + scantwo = ro.r["scantwo"] # Map the scantwo function + calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function crossname = dataset.group.name - genofilelocation = locate(crossname + ".geno", "genotype") - crossfilelocation = TMPDIR + crossname + ".cross" - - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available + try: + generate_cross_from_rdata(dataset) + read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function + genofilelocation = locate(crossname + ".RData", "genotype/rdata") + cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function + except: + generate_cross_from_geno(dataset) + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + crossfilelocation = TMPDIR + crossname + ".cross" + genofilelocation = locate(crossname + ".geno", "genotype") + + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if manhattan_plot: cross_object = calc_genoprob(cross_object) @@ -71,7 +77,18 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, else: return process_rqtl_results(result_data_frame) -def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_rdata(dataset): + rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") + ro.r(""" + generate_cross_from_rdata <- function(filename = '%s') { + load(file=filename) + cross = cunique + return(cross) + } + """ % (rdata_location)) + +def generate_cross_from_geno(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -170,6 +187,7 @@ def process_rqtl_results(result): # TODO: how to make this a one liner an marker = {} marker['name'] = result.rownames[i] marker['chr'] = output[i][0] + marker['cM'] = output[i][1] marker['Mb'] = output[i][1] marker['lod_score'] = output[i][2] qtl_results.append(marker) diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index ba41ffc3..f03b046e 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -81,7 +81,6 @@ class RunMapping(object): self.vals.append(value) else: self.samples = [] - for sample in self.dataset.group.samplelist: # sample is actually the name of an individual if (len(genofile_samplelist) == 0) or (sample in genofile_samplelist): in_trait_data = False @@ -186,7 +185,7 @@ class RunMapping(object): self.showGenes = "ON" self.viewLegend = "ON" - self.dataset.group.get_markers() + #self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html index 97e46756..1be6539d 100644 --- a/wqflask/wqflask/templates/collections/view.html +++ b/wqflask/wqflask/templates/collections/view.html @@ -161,10 +161,10 @@ {% endblock %} {% block js %} + + - - diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index a806a8b3..ad7412b2 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -4,25 +4,18 @@
-
+ {% for mapping_method in dataset.group.mapping_names %} + {% if mapping_method == "GEMMA" %} +
@@ -102,7 +97,7 @@
- {% if dataset.group.mapping_id == "1" %} + {% elif mapping_method == "QTLReaper" %}
@@ -217,7 +212,8 @@
-
+ {% elif mapping_method == "R/qtl" %} +
@@ -341,19 +337,24 @@
{% endif %} + {% endfor %}
+ {% for mapping_method in dataset.group.mapping_names %} + {% if mapping_method == "GEMMA" %}
GEMMA
Maps traits with correction for kinship among samples using a linear mixed model method, and also allows users to fit multiple covariates such as sex, age, treatment, and genetic markers (PMID: 2453419, and GitHub code). GEMMA incorporates the Leave One Chromosome Out (LOCO) method to ensure that the correction for kinship does not remove useful genetic variance near each marker. Markers can be filtered to include only those with minor allele frequencies (MAF) above a threshold. The default MAF is 0.05.
- {% if dataset.group.mapping_id == "1" %} + {% elif mapping_method == "R/qtl" %}
R/qtl
Major upgrade of R/qtl that supports most experimental populations including those with complex admixture and two or more parental lines as well as large omic data sets (PMID: 30591514). Both R/qtl and R/qtl2 are available as stand-alone R packages (R suite).
+ {% elif mapping_method == "QTLReaper" %}
Haley-Knott Regression
Fast linear mapping method (PMID 16718932) works well with F2 intercrosses and backcrosses, but that is not recommended for complex or admixed populations (e.g., GWAS or heterogeneous stock studies) or for advanced intercrosses, recombinant inbred families, or diallel crosses. Interactive plots in GeneNetwork have relied on the fast HK mapping for two decades and we still use this method for mapping omics data sets and computing genome-wide permutation threshold (QTL Reaper code).
{% endif %} + {% endfor %}