diff options
author | Arun Isaac | 2023-12-29 18:55:37 +0000 |
---|---|---|
committer | Arun Isaac | 2023-12-29 19:01:46 +0000 |
commit | 204a308be0f741726b9a620d88fbc22b22124c81 (patch) | |
tree | b3cf66906674020b530c844c2bb4982c8a0e2d39 /gn2/wqflask/marker_regression | |
parent | 83062c75442160427b50420161bfcae2c5c34c84 (diff) | |
download | genenetwork2-204a308be0f741726b9a620d88fbc22b22124c81.tar.gz |
Namespace all modules under gn2.
We move all modules under a gn2 directory. This is important for
"correct" packaging and deployment as a Guix service.
Diffstat (limited to 'gn2/wqflask/marker_regression')
-rw-r--r-- | gn2/wqflask/marker_regression/__init__.py | 0 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/display_mapping_results.py | 3336 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/exceptions.py | 13 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/gemma_mapping.py | 245 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/plink_mapping.py | 167 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/qtlreaper_mapping.py | 193 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/rqtl_mapping.py | 167 | ||||
-rw-r--r-- | gn2/wqflask/marker_regression/run_mapping.py | 743 |
8 files changed, 4864 insertions, 0 deletions
diff --git a/gn2/wqflask/marker_regression/__init__.py b/gn2/wqflask/marker_regression/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/gn2/wqflask/marker_regression/__init__.py diff --git a/gn2/wqflask/marker_regression/display_mapping_results.py b/gn2/wqflask/marker_regression/display_mapping_results.py new file mode 100644 index 00000000..b3bf3bc3 --- /dev/null +++ b/gn2/wqflask/marker_regression/display_mapping_results.py @@ -0,0 +1,3336 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by Zach 12/14/2010 + +import datetime +import string +from math import * +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont +from PIL import ImageColor +import os +import json + +import htmlgen as HT + +from gn2.base import webqtlConfig +from gn2.base.GeneralObject import GeneralObject +from gn2.utility import webqtlUtil +from gn2.utility import Plot +from gn2.utility.tools import get_setting +from gn2.wqflask.interval_analyst import GeneUtil +from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR +from gn2.utility.pillow_utils import draw_rotated_text, draw_open_polygon +from gn2.wqflask.database import database_connection + +try: # Only import this for Python3 + from functools import reduce +except: + pass + +RED = ImageColor.getrgb("red") +BLUE = ImageColor.getrgb("blue") +GRAY = ImageColor.getrgb("gray") +GOLD = ImageColor.getrgb("gold") +BLACK = ImageColor.getrgb("black") +GREEN = ImageColor.getrgb("green") +PURPLE = ImageColor.getrgb("purple") +ORANGE = ImageColor.getrgb("orange") +YELLOW = ImageColor.getrgb("yellow") +DARKRED = ImageColor.getrgb("darkred") +DARKBLUE = ImageColor.getrgb("darkblue") +DARKGRAY = ImageColor.getrgb("darkgray") +DEEPPINK = ImageColor.getrgb("deeppink") +DARKGREEN = ImageColor.getrgb("darkgreen") +GAINSBORO = ImageColor.getrgb("gainsboro") +LIGHTBLUE = ImageColor.getrgb("lightblue") +DARKORANGE = ImageColor.getrgb("darkorange") +DARKVIOLET = ImageColor.getrgb("darkviolet") +MEDIUMPURPLE = ImageColor.getrgb("mediumpurple") +# ---- END: Define common colours ---- # + +# ZS: List of distinct colors for manhattan plot if user selects "varied" +COLOR_CODES = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", + "#000000", "#800000", "#008000", "#000080", "#808000", "#800080", + "#008080", "#808080", "#C00000", "#00C000", "#0000C0", "#C0C000", + "#C000C0", "#00C0C0", "#C0C0C0", "#400000", "#004000", "#000040"] + +DISTINCT_COLOR_LIST = [ImageColor.getrgb(color) for color in COLOR_CODES] + +# ---- FONT FILES ---- # +VERDANA_FILE = "./gn2/wqflask/static/fonts/verdana.ttf" +VERDANA_BOLD_FILE = "./gn2/wqflask/static/fonts/verdanab.ttf" +TREBUC_FILE = "./gn2/wqflask/static/fonts/trebucbd.ttf" +FNT_BS_FILE = "./gn2/wqflask/static/fonts/fnt_bs.ttf" +ARIAL_FILE = "./gn2/wqflask/static/fonts/arial.ttf" + +assert(os.path.isfile(VERDANA_FILE)) + +class HtmlGenWrapper: + """Wrapper Methods for HTML gen""" + @staticmethod + def create_image_tag(**kwargs): + image = HT.Image("", "") + for key, value in list(kwargs.items()): + image.set_attribute(key, value) + return image + + @staticmethod + def create_form_tag(**kwargs): + form = HT.Form("POST", "") # Default method is POST + + for key, value in list(kwargs.items()): + if key == "submit": + form.append(value) + continue + form.set_attribute(key.replace("cgi", "action"), str(value)) + return form + + @staticmethod + def create_p_tag(**kwargs): + paragraph = HT.Paragraph() + for key, value in list(kwargs.items()): + paragraph.set_attribute(key, value) + return paragraph + + @staticmethod + def create_br_tag(): + return HT.VoidElement("br") + + @staticmethod + def create_input_tag(**kwargs): + input_ = HT.Input() + for key, value in list(kwargs.items()): + input_.set_attribute(key.lower().replace("_", ""), value) + return input_ + + @staticmethod + def create_area_tag(**kwargs): + area = HT.VoidElement("area") + for key, value in list(kwargs.items()): + area.set_attribute(key, value) + return area + + @staticmethod + def create_link_tag(href, content, **kwargs): + link = HT.Link(href, content) + for key, value in list(kwargs.items()): + link.set_attribute(key, value) + return link + + @staticmethod + def create_map_tag(**kwargs): + map_ = HT.Element("map") + for key, value in list(kwargs.items()): + map_.set_attribute(key, value) + return map_ + + +class DisplayMappingResults: + """Inteval Mapping Plot Page""" + cMGraphInterval = 5 + GRAPH_MIN_WIDTH = 900 + GRAPH_MAX_WIDTH = 10000 # Don't set this too high + GRAPH_DEFAULT_WIDTH = 1280 + MULT_GRAPH_DEFAULT_WIDTH = 2000 + MULT_GRAPH_MIN_WIDTH = 1400 + MULT_GRAPH_DEFAULT_WIDTH = 1600 + GRAPH_DEFAULT_HEIGHT = 600 + + # Display order: + # UCSC BAND ========= + # ENSEMBL BAND -=-=-= + # ** GENES ********** + BAND_SPACING = 4 + + BAND_HEIGHT = 10 + BAND_HEIGHT = 10 + BAND_HEIGHT = 10 + + NUM_GENE_ROWS = 10 + EACH_GENE_HEIGHT = 6 # number of pixels tall, for each gene to display + EACH_GENE_ARROW_WIDTH = 5 + EACH_GENE_ARROW_SPACING = 14 + DRAW_DETAIL_MB = 4 + DRAW_UTR_LABELS_MB = 4 + + qmarkImg = HtmlGenWrapper.create_image_tag( + src='/images/qmarkBoxBlue.gif', + width="10", height="13", border="0", alt='Glossary' + ) + + # Note that "qmark.gif" is a similar, smaller, rounded-edges + # question mark. It doesn't look like the ones on the image, + # though, which is why we don't use it here. + + HELP_WINDOW_NAME = 'helpWind' + + # BEGIN HaplotypeAnalyst + NR_INDIVIDUALS = 0 + # END HaplotypeAnalyst + + ALEX_DEBUG_BOOL_PRINT_GENE_LIST = 1 + + kONE_MILLION = 1000000 + + LODFACTOR = 4.61 + + SNP_COLOR = ORANGE # Color for the SNP "seismograph" + TRANSCRIPT_LOCATION_COLOR = MEDIUMPURPLE + + BOOTSTRAP_BOX_COLOR = YELLOW + LRS_COLOR = ImageColor.getrgb("#0000FF") + SIGNIFICANT_COLOR = ImageColor.getrgb("#EBC7C7") + SUGGESTIVE_COLOR = GAINSBORO + SIGNIFICANT_WIDTH = 5 + SUGGESTIVE_WIDTH = 5 + ADDITIVE_COLOR_POSITIVE = GREEN + ADDITIVE_COLOR_NEGATIVE = ORANGE + DOMINANCE_COLOR_POSITIVE = DARKVIOLET + DOMINANCE_COLOR_NEGATIVE = RED + + # BEGIN HaplotypeAnalyst + HAPLOTYPE_POSITIVE = GREEN + HAPLOTYPE_NEGATIVE = RED + HAPLOTYPE_HETEROZYGOUS = BLUE + HAPLOTYPE_RECOMBINATION = DARKGRAY + # END HaplotypeAnalyst + + TOP_RIGHT_INFO_COLOR = BLACK + + CLICKABLE_WEBQTL_REGION_COLOR = ImageColor.getrgb("#F5D3D3") + CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR = ImageColor.getrgb("#FCE9E9") + CLICKABLE_WEBQTL_TEXT_COLOR = ImageColor.getrgb("#912828") + + CLICKABLE_PHENOGEN_REGION_COLOR = ImageColor.getrgb("#A2FB94") + CLICKABLE_PHENOGEN_REGION_OUTLINE_COLOR = ImageColor.getrgb("#CEFEC7") + CLICKABLE_PHENOGEN_TEXT_COLOR = ImageColor.getrgb("#1FD504") + + CLICKABLE_UCSC_REGION_COLOR = ImageColor.getrgb("#DDDDEE") + CLICKABLE_UCSC_REGION_OUTLINE_COLOR = ImageColor.getrgb("#EDEDFF") + CLICKABLE_UCSC_TEXT_COLOR = ImageColor.getrgb("#333366") + + CLICKABLE_ENSEMBL_REGION_COLOR = ImageColor.getrgb("#EEEEDD") + CLICKABLE_ENSEMBL_REGION_OUTLINE_COLOR = ImageColor.getrgb("#FEFEEE") + CLICKABLE_ENSEMBL_TEXT_COLOR = ImageColor.getrgb("#555500") + + GRAPH_BACK_LIGHT_COLOR = ImageColor.getrgb("#FBFBFF") + GRAPH_BACK_DARK_COLOR = ImageColor.getrgb("#F1F1F9") + + HELP_PAGE_REF = '/glossary.html' + + def __init__(self, start_vars): + self.temp_uuid = start_vars['temp_uuid'] + self.hash_of_inputs = start_vars['hash_of_inputs'] + self.dataid = start_vars['dataid'] + + self.dataset = start_vars['dataset'] + self.this_trait = start_vars['this_trait'] + self.n_samples = start_vars['n_samples'] + self.species = start_vars['species'] + self.genofile_string = "" + if 'genofile_string' in start_vars: + self.genofile_string = start_vars['genofile_string'] + + self.geno_db_exists = start_vars['geno_db_exists'] + + self.first_run = True + if 'first_run' in start_vars: + self.first_run = start_vars['first_run'] + + if 'temp_trait' in start_vars and start_vars['temp_trait'] != "False": + self.temp_trait = "True" + self.group = start_vars['group'] + + # Needing for form submission when doing single chr + # mapping or remapping after changing options + self.sample_vals = start_vars['sample_vals'] + self.vals_hash= start_vars['vals_hash'] + self.sample_vals_dict = json.loads(self.sample_vals) + + self.transform = start_vars['transform'] + self.mapping_method = start_vars['mapping_method'] + self.mapping_results_path = start_vars['mapping_results_path'] + if self.mapping_method == "rqtl_geno": + self.mapmethod_rqtl_geno = start_vars['method'] + self.mapmodel_rqtl_geno = start_vars['model'] + self.pair_scan = start_vars['pair_scan'] + + self.js_data = start_vars['js_data'] + # Top markers to display in table + self.trimmed_markers = start_vars['trimmed_markers'] + + if self.dataset.group.species == "rat": + self._ucscDb = "rn6" + elif self.dataset.group.species == "mouse": + self._ucscDb = "mm10" + else: + self._ucscDb = "" + + ##################################### + # Options + ##################################### + # Mapping options + if start_vars['mapping_scale'] != "": + self.plotScale = start_vars['mapping_scale'] + else: + self.plotScale = "physic" + + self.manhattan_plot = start_vars['manhattan_plot'] + if self.manhattan_plot: + self.color_scheme = "alternating" + if 'color_scheme' in start_vars: + self.color_scheme = start_vars['color_scheme'] + if self.color_scheme == "single": + self.manhattan_single_color = ImageColor.getrgb( + "#" + start_vars['manhattan_single_color']) + + if 'permCheck' in list(start_vars.keys()): + self.permChecked = start_vars['permCheck'] + else: + self.permChecked = False + if start_vars['num_perm'] > 0: + self.nperm = int(start_vars['num_perm']) + if self.permChecked: + self.perm_output = start_vars['perm_output'] + self.suggestive = start_vars['suggestive'] + self.significant = start_vars['significant'] + else: + self.nperm = 0 + + if 'bootCheck' in list(start_vars.keys()): + self.bootChecked = start_vars['bootCheck'] + else: + self.bootChecked = False + if 'num_bootstrap' in list(start_vars.keys()): + self.nboot = int(start_vars['num_bootstrap']) + else: + self.nboot = 0 + if 'bootstrap_results' in list(start_vars.keys()): + self.bootResult = start_vars['bootstrap_results'] + else: + self.bootResult = [] + + if 'do_control' in list(start_vars.keys()): + self.doControl = start_vars['do_control'] + else: + self.doControl = "false" + if 'control_marker' in list(start_vars.keys()): + self.controlLocus = start_vars['control_marker'] + else: + self.controlLocus = "" + if 'covariates' in list(start_vars.keys()): + self.covariates = start_vars['covariates'] + if 'maf' in list(start_vars.keys()): + self.maf = start_vars['maf'] + else: + self.maf = "" + if 'output_files' in list(start_vars.keys()): + self.output_files = start_vars['output_files'] + if 'use_loco' in list(start_vars.keys()) and self.mapping_method == "gemma": + self.use_loco = start_vars['use_loco'] + + if self.mapping_method == "reaper": + if 'output_files' in start_vars: + self.output_files = ",".join( + [(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) + + self.categorical_vars = "" + self.perm_strata = "" + if 'perm_strata' in list(start_vars.keys()) and 'categorical_vars' in list(start_vars.keys()): + self.categorical_vars = start_vars['categorical_vars'] + self.perm_strata = start_vars['perm_strata'] + + self.selectedChr = int(start_vars['selected_chr']) + + self.strainlist = start_vars['samples'] + + self.traitList = [] + thisTrait = start_vars['this_trait'] + self.traitList.append(thisTrait) + + ################################################################ + # Calculations QTL goes here + ################################################################ + self.multipleInterval = len(self.traitList) > 1 + self.qtlresults = start_vars['qtl_results'] + + if self.multipleInterval: + self.colorCollection = Plot.colorSpectrum(len(self.qtlresults)) + else: + self.colorCollection = [self.LRS_COLOR] + + self.dataset.group.genofile = self.genofile_string.split(":")[0] + if self.mapping_method == "reaper" and self.manhattan_plot != True: + self.genotype = self.dataset.group.read_genotype_file( + use_reaper=True) + else: + self.genotype = self.dataset.group.read_genotype_file() + + # Darwing Options + try: + if self.selectedChr > -1: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + else: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + except: + if self.selectedChr > -1: + self.graphWidth = self.GRAPH_DEFAULT_WIDTH + else: + self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH + +# BEGIN HaplotypeAnalyst + if 'haplotypeAnalystCheck' in list(start_vars.keys()): + self.haplotypeAnalystChecked = start_vars['haplotypeAnalystCheck'] + else: + self.haplotypeAnalystChecked = False +# END HaplotypeAnalyst + + self.graphHeight = self.GRAPH_DEFAULT_HEIGHT + self.dominanceChecked = False + if 'LRSCheck' in list(start_vars.keys()): + self.LRS_LOD = start_vars['LRSCheck'] + else: + self.LRS_LOD = start_vars['score_type'] + self.intervalAnalystChecked = True + self.draw2X = False + if 'additiveCheck' in list(start_vars.keys()): + self.additiveChecked = start_vars['additiveCheck'] + else: + self.additiveChecked = False + if 'viewLegend' in list(start_vars.keys()): + self.legendChecked = start_vars['viewLegend'] + else: + self.legendChecked = False + if 'showSNP' in list(start_vars.keys()): + self.SNPChecked = start_vars['showSNP'] + else: + self.SNPChecked = False + if 'showHomology' in list(start_vars.keys()): + self.homologyChecked = start_vars['showHomology'] + else: + self.homologyChecked = "ON" + if 'showGenes' in list(start_vars.keys()): + self.geneChecked = start_vars['showGenes'] + else: + self.geneChecked = False + try: + self.startMb = float(start_vars['startMb']) + except: + self.startMb = -1 + try: + self.endMb = float(start_vars['endMb']) + except: + self.endMb = -1 + try: + self.lrsMax = float(start_vars['lrsMax']) + except: + self.lrsMax = 0 + + # Trait Infos + self.identification = "" + + ################################################################ + # Generate Chr list and Retrieve Length Information + ################################################################ + self.ChrList = [("All", -1)] + for i, indChr in enumerate(self.genotype): + if self.dataset.group.species == "mouse" and indChr.name == "20": + self.ChrList.append(("X", i)) + elif self.dataset.group.species == "rat" and indChr.name == "21": + self.ChrList.append(("X", i)) + self.ChrList.append((indChr.name, i)) + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute("SELECT Length FROM Chr_Length, InbredSet " + "WHERE Chr_Length.SpeciesId = InbredSet.SpeciesId " + "AND InbredSet.Name = %s AND Chr_Length.Name IN " + f"({', '.join(['%s' for x in self.ChrList[1:]])}) " + "ORDER BY Chr_Length.OrderId", + (self.dataset.group.name, + *[x[0] for x in self.ChrList[1:]],)) + self.ChrLengthMbList = cursor.fetchall() + + self.ChrLengthMbList = [x[0] / 1000000.0 for x in self.ChrLengthMbList] + self.ChrLengthMbSum = reduce( + lambda x, y: x + y, self.ChrLengthMbList, 0.0) + if self.ChrLengthMbList: + self.MbGraphInterval = self.ChrLengthMbSum / \ + (len(self.ChrLengthMbList) * 12) # Empirical Mb interval + else: + self.MbGraphInterval = 1 + + self.ChrLengthCMList = [] + for i, _chr in enumerate(self.genotype): + self.ChrLengthCMList.append(_chr[-1].cM - _chr[0].cM) + + self.ChrLengthCMSum = reduce( + lambda x, y: x + y, self.ChrLengthCMList, 0.0) + + if self.plotScale == 'physic': + self.GraphInterval = self.MbGraphInterval # Mb + else: + self.GraphInterval = self.cMGraphInterval # cM + + ######################### + # Get the sorting column + ######################### + RISet = self.dataset.group.name + if RISet in ('AXB', 'BXA', 'AXBXA'): + self.diffCol = ['B6J', 'A/J'] + elif RISet in ('BXD', 'BXD300', 'B6D2F2', 'BDF2-2005', 'BDF2-1999', 'BHHBF2', 'BXD-Harvested', 'BXD-Longevity', 'BXD-Micturition', 'BXD-AE', 'BXD-NIA-AD', 'B6D2RI', 'BXD-Bone', 'DOD-BXD-GWI', 'BXD-Heart-Metals', 'UTHSC-Cannabinoid'): + self.diffCol = ['B6J', 'D2J'] + elif RISet in ('CXB'): + self.diffCol = ['CBY', 'B6J'] + elif RISet in ('BXH', 'BHF2'): + self.diffCol = ['B6J', 'C3H'] + elif RISet in ('B6BTBRF2'): + self.diffCol = ['B6J', 'BTB'] + elif RISet in ('LXS'): + self.diffCol = ['ILS', 'ISS'] + else: + self.diffCol = [] + + for i, strain in enumerate(self.diffCol): + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute("SELECT Id FROM Strain WHERE Symbol = %s", + (strain,)) + if result := cursor.fetchone(): + self.diffCol[i] = result[0] + + ################################################################ + # GeneCollection goes here + ################################################################ + if self.plotScale == 'physic' and self.selectedChr != -1: + #StartMb or EndMb + if self.startMb < 0 or self.endMb < 0: + self.startMb = 0 + self.endMb = self.ChrLengthMbList[self.selectedChr - 1] + + geneTable = "" + + self.geneCol = None + self.homology = None + if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked or self.homologyChecked): + # Draw the genes for this chromosome / region of this chromosome + webqtldatabase = self.dataset.name + + if self.dataset.group.species == "mouse": + if self.selectedChr == 20: + chrName = "X" + else: + chrName = self.selectedChr + self.geneCol = GeneUtil.loadGenes( + str(chrName), self.diffCol, self.startMb, self.endMb, "mouse") + self.homology = GeneUtil.load_homology(str(chrName), self.startMb, self.endMb, "mouse_to_human.csv") + + elif self.dataset.group.species == "rat": + if self.selectedChr == 21: + chrName = "X" + else: + chrName = self.selectedChr + self.geneCol = GeneUtil.loadGenes( + str(chrName), self.diffCol, self.startMb, self.endMb, "rat") + + if self.geneCol and self.intervalAnalystChecked: + ####################################################################### + #Nick use GENEID as RefGene to get Literature Correlation Informations# + #For Interval Mapping, Literature Correlation isn't useful, so skip it# + #through set GENEID is None # + ####################################################################### + + GENEID = None + + self.geneTable(self.geneCol, GENEID) + +# BEGIN HaplotypeAnalyst +# count the amount of individuals to be plotted, and increase self.graphHeight + if self.haplotypeAnalystChecked and self.selectedChr > -1: + thisTrait = self.this_trait + smd = [] + for sample in self.sample_vals_dict.keys(): + if self.sample_vals_dict[sample] != "x": + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) + smd.append(temp) + else: + continue + samplelist = list(self.genotype.prgy) + for j, _geno in enumerate(self.genotype[0][1].genotype): + for item in smd: + if item.name == samplelist[j]: + self.NR_INDIVIDUALS = self.NR_INDIVIDUALS + 1 +# default: + self.graphHeight = self.graphHeight + 2 * \ + (self.NR_INDIVIDUALS + 10) * self.EACH_GENE_HEIGHT +# END HaplotypeAnalyst + + if self.homologyChecked and self.homology and self.geneChecked and self.geneCol: + self.graphHeight = self.graphHeight + (self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT + if self.geneChecked and self.geneCol: + self.graphHeight = self.graphHeight + (self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT + + + ################################################################ + # Plots goes here + ################################################################ + showLocusForm = "" + intCanvas = Image.new("RGBA", size=(self.graphWidth, self.graphHeight)) + gifmap = self.plotIntMapping( + intCanvas, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm) + + self.gifmap = gifmap.__str__() + + self.filename = webqtlUtil.genRandStr("Itvl_") + intCanvas.save( + "{}.png".format( + os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename)), + format='png') + intImg = HtmlGenWrapper.create_image_tag( + src="/image/{}.png".format(self.filename), + border="0", usemap='#WebQTLImageMap' + ) + + # Scales plot differently for high resolution + if self.draw2X: + intCanvasX2 = Image.new("RGBA", size=( + self.graphWidth * 2, self.graphHeight * 2)) + gifmapX2 = self.plotIntMapping( + intCanvasX2, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm, zoom=2) + intCanvasX2.save( + "{}.png".format( + os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, + self.filename + "X2")), + format='png') + + ################################################################ + # Outputs goes here + ################################################################ + # this form is used for opening Locus page or trait page, only available for genetic mapping + if showLocusForm: + showLocusForm = HtmlGenWrapper.create_form_tag( + cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), + enctype='multipart/form-data', + name=showLocusForm, + submit=HtmlGenWrapper.create_input_tag(type_='hidden')) + + hddn = {'FormID': 'showDatabase', 'ProbeSetID': '_', 'database': fd.RISet + \ + "Geno", 'CellID': '_', 'RISet': fd.RISet, 'incparentsf1': 'ON'} + for key in hddn.keys(): + showLocusForm.append(HtmlGenWrapper.create_input_tag( + name=key, value=hddn[key], type_='hidden')) + showLocusForm.append(intImg) + else: + showLocusForm = intImg + + if (self.permChecked and self.nperm > 0) and not (self.multipleInterval and 0 < self.nperm): + self.perm_filename = self.drawPermutationHistogram() + + ################################################################ + # footnote goes here + ################################################################ + # Small('More information about this graph is available here.') + btminfo = HtmlGenWrapper.create_p_tag(id="smallsize") + + if self.traitList and self.traitList[0].dataset and self.traitList[0].dataset.type == 'Geno': + btminfo.append(HtmlGenWrapper.create_br_tag()) + btminfo.append( + 'Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') + + def plotIntMapping(self, canvas, offset=(80, 120, 110, 100), zoom=1, startMb=None, endMb=None, showLocusForm=""): + im_drawer = ImageDraw.Draw(canvas) + # calculating margins + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + if self.multipleInterval: + yTopOffset = max(90, yTopOffset) + else: + if self.legendChecked: + yTopOffset += 10 + if self.covariates != "" and self.controlLocus and self.doControl != "false": + yTopOffset += 25 + if len(self.transform) > 0: + yTopOffset += 5 + else: + pass + + if self.plotScale != 'physic': + yBottomOffset = max(120, yBottomOffset) + fontZoom = zoom + if zoom == 2: + xLeftOffset += 20 + fontZoom = 1.5 + + xLeftOffset = int(xLeftOffset * fontZoom) + xRightOffset = int(xRightOffset * fontZoom) + yBottomOffset = int(yBottomOffset * fontZoom) + + cWidth = canvas.size[0] + cHeight = canvas.size[1] + plotWidth = cWidth - xLeftOffset - xRightOffset + plotHeight = cHeight - yTopOffset - yBottomOffset + + # Drawing Area Height + drawAreaHeight = plotHeight + if self.plotScale == 'physic' and self.selectedChr > -1: + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + drawAreaHeight -= 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + 10 * zoom + else: + drawAreaHeight -= 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom + if self.homologyChecked: + drawAreaHeight -= self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + if self.geneChecked: + drawAreaHeight -= self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + else: + if self.selectedChr > -1: + drawAreaHeight -= 20 + else: + drawAreaHeight -= 30 + +# BEGIN HaplotypeAnalyst + if self.haplotypeAnalystChecked and self.selectedChr > -1: + drawAreaHeight -= self.EACH_GENE_HEIGHT * \ + (self.NR_INDIVIDUALS + 10) * 2 * zoom +# END HaplotypeAnalyst + + if zoom == 2: + drawAreaHeight -= 60 + + # Image map + gifmap = HtmlGenWrapper.create_map_tag(name="WebQTLImageMap") + + newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) + # Draw the alternating-color background first and get plotXScale + plotXScale = self.drawGraphBackground( + canvas, gifmap, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + + # draw bootstap + if self.bootChecked and not self.multipleInterval: + self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + + # Draw clickable region and gene band if selected + if self.plotScale == 'physic' and self.selectedChr > -1: + self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) + + if self.geneChecked and self.geneCol: + self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) + if self.homologyChecked and self.homology: + if self.geneChecked and self.geneCol: + yTopOffset = newoffset[3] + self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + 10 + self.drawHomologyBand(canvas, gifmap, plotXScale, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), + zoom=zoom, startMb=startMb, endMb=endMb) + if self.SNPChecked: + self.drawSNPTrackNew( + canvas, offset=newoffset, zoom=2 * zoom, startMb=startMb, endMb=endMb) +# BEGIN HaplotypeAnalyst + if self.haplotypeAnalystChecked: + self.drawHaplotypeBand( + canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) +# END HaplotypeAnalyst + # Draw X axis + self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + # Draw QTL curve + self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + + # draw legend + if self.multipleInterval: + self.drawMultiTraitName( + fd, canvas, gifmap, showLocusForm, offset=newoffset) + elif self.legendChecked: + self.drawLegendPanel(canvas, offset=newoffset, zoom=zoom) + else: + pass + + # draw position, no need to use a separate function + self.drawProbeSetPosition( + canvas, plotXScale, offset=newoffset, zoom=zoom) + + return gifmap + + def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + bootHeightThresh = drawAreaHeight * 3 / 4 + + # break bootstrap result into groups + BootCoord = [] + i = 0 + previous_chr = None + previous_chr_as_int = 0 + startX = xLeftOffset + + BootChrCoord = [] + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes + for i, result in enumerate(self.qtlresults): + if result['chr'] != previous_chr: + previous_chr = result['chr'] + previous_chr_as_int += 1 + if previous_chr_as_int != 1: + BootCoord.append(BootChrCoord) + BootChrCoord = [] + startX += ( + self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval) * plotXScale + if self.plotScale == 'physic': + Xc = startX + (result['Mb'] - self.startMb) * plotXScale + else: + Xc = startX + \ + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale + BootChrCoord.append([Xc, self.bootResult[i]]) + else: + for i, result in enumerate(self.qtlresults): + if str(result['chr']) == str(self.ChrList[self.selectedChr][0]): + if self.plotScale == 'physic': + Xc = startX + (result['Mb'] - \ + self.startMb) * plotXScale + else: + Xc = startX + \ + (result['cM'] - self.qtlresults[0] + ['cM']) * plotXScale + BootChrCoord.append([Xc, self.bootResult[i]]) + BootCoord = [BootChrCoord] + + # reduce bootResult + if self.selectedChr > -1: + maxBootBar = 80.0 + else: + maxBootBar = 200.0 + stepBootStrap = plotWidth / maxBootBar + reducedBootCoord = [] + maxBootCount = 0 + + for BootChrCoord in BootCoord: + nBoot = len(BootChrCoord) + bootStartPixX = BootChrCoord[0][0] + bootCount = BootChrCoord[0][1] + for i in range(1, nBoot): + if BootChrCoord[i][0] - bootStartPixX < stepBootStrap: + bootCount += BootChrCoord[i][1] + continue + else: + if maxBootCount < bootCount: + maxBootCount = bootCount + # end if + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[i][0], bootCount]) + bootStartPixX = BootChrCoord[i][0] + bootCount = BootChrCoord[i][1] + # end else + # end for + # add last piece + if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap / 2.0: + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[-1][0], bootCount]) + else: + reducedBootCoord[-1][2] += bootCount + reducedBootCoord[-1][1] = BootChrCoord[-1][0] + # end else + if maxBootCount < reducedBootCoord[-1][2]: + maxBootCount = reducedBootCoord[-1][2] + # end if + for item in reducedBootCoord: + if item[2] > 0: + if item[0] < xLeftOffset: + item[0] = xLeftOffset + if item[0] > xLeftOffset + plotWidth: + item[0] = xLeftOffset + plotWidth + if item[1] < xLeftOffset: + item[1] = xLeftOffset + if item[1] > xLeftOffset + plotWidth: + item[1] = xLeftOffset + plotWidth + if item[0] != item[1]: + im_drawer.rectangle( + xy=((item[0], yZero), + (item[1], yZero - item[2] * bootHeightThresh / maxBootCount)), + fill=self.BOOTSTRAP_BOX_COLOR, outline=BLACK) + + if maxBootCount == 0: + return + + # draw boot scale + highestPercent = (maxBootCount * 100.0) / nboot + bootScale = Plot.detScale(0, highestPercent) + bootScale = Plot.frange( + bootScale[0], bootScale[1], bootScale[1] / bootScale[2]) + bootScale = bootScale[:-1] + [highestPercent] + + bootOffset = 50 * fontZoom + bootScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=13 * fontZoom) + im_drawer.rectangle( + xy=((canvas.size[0] - bootOffset, yZero - bootHeightThresh), + (canvas.size[0] - bootOffset - 15 * zoom, yZero)), + fill=YELLOW, outline=BLACK) + im_drawer.line( + xy=((canvas.size[0] - bootOffset + 4, yZero), + (canvas.size[0] - bootOffset, yZero)), + fill=BLACK) + TEXT_Y_DISPLACEMENT = -8 + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, yZero + TEXT_Y_DISPLACEMENT), text='0%', + font=bootScaleFont, fill=BLACK) + + for item in bootScale: + if item == 0: + continue + bootY = yZero - bootHeightThresh * item / highestPercent + im_drawer.line( + xy=((canvas.size[0] - bootOffset + 4, bootY), + (canvas.size[0] - bootOffset, bootY)), + fill=BLACK) + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, bootY + TEXT_Y_DISPLACEMENT), + text='%2.1f' % item, font=bootScaleFont, fill=BLACK) + + if self.legendChecked: + if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): + startPosY = 30 + else: + startPosY = 15 + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) + leftOffset = canvas.size[0] - xRightOffset - 190 + im_drawer.rectangle( + xy=((leftOffset, startPosY - 6), + (leftOffset + 12, startPosY + 6)), + fill=YELLOW, outline=BLACK) + im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY + TEXT_Y_DISPLACEMENT), + text='Frequency of the Peak LRS', + font=smallLabelFont, fill=BLACK) + + def drawProbeSetPosition(self, canvas, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + if len(self.traitList) != 1: + return + + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + try: + Chr = self.traitList[0].chr + Mb = self.traitList[0].mb + except: + return + + previous_chr = 1 + previous_chr_as_int = 0 + if self.plotScale == "physic": + this_chr = str(self.ChrList[self.selectedChr][0]) + else: + this_chr = str(self.ChrList[self.selectedChr][1] + 1) + + if self.plotScale == 'physic': + if self.selectedChr > -1: + if this_chr != Chr or Mb < self.startMb or Mb > self.endMb: + return + else: + locPixel = xLeftOffset + (Mb - self.startMb) * plotXScale + else: + locPixel = xLeftOffset + for i, _chr in enumerate(self.ChrList[1:]): + if _chr[0] != Chr: + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale + else: + locPixel += Mb * plotXScale + break + else: + if self.selectedChr > -1: + for i, qtlresult in enumerate(self.qtlresults): + if qtlresult['chr'] != self.selectedChr: + continue + + if i == 0 and qtlresult['Mb'] >= Mb: + locPixel = -1 + break + + # the trait's position is between two traits + if i > 0 and self.qtlresults[i - 1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: + locPixel = xLeftOffset + plotXScale * (self.qtlresults[i - 1]['Mb'] + (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb']) * ( + Mb - self.qtlresults[i - 1]['Mb']) / (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb'])) + break + + # the trait's position is on the right of the last genotype + if i == len(self.qtlresults) and Mb >= qtlresult['Mb']: + locPixel = -1 + else: + locPixel = xLeftOffset + for i, _chr in enumerate(self.ChrList): + if i < (len(self.ChrList) - 1): + if _chr != Chr: + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale + else: + locPixel += (Mb * (_chr[-1].cM - _chr[0].cM) / \ + self.ChrLengthCMList[i]) * plotXScale + break + if locPixel >= 0 and self.plotScale == 'physic': + traitPixel = ((locPixel, yZero), (locPixel - 7, + yZero + 14), (locPixel + 7, yZero + 14)) + draw_open_polygon(canvas, xy=traitPixel, outline=BLACK, + fill=self.TRANSCRIPT_LOCATION_COLOR) + + def drawSNPTrackNew(self, canvas, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + if self.plotScale != 'physic' or self.selectedChr == -1 or not self.diffCol: + return + + SNP_HEIGHT_MODIFIER = 18.0 + + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + drawSNPLocationY = yTopOffset + plotHeight + #chrName = self.genotype[0].name + chrName = self.ChrList[self.selectedChr][0] + + stepMb = (endMb - startMb) / plotWidth + strainId1, strainId2 = self.diffCol + SNPCounts = [] + + while startMb < endMb: + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + # snp count + cursor.execute("SELECT COUNT(*) FROM BXDSnpPosition " + "WHERE Chr = %s AND Mb >= %s AND Mb < %s AND " + "StrainId1 = %s AND StrainId2 = %s", + (chrName, f"{startMb:2.6f}", + f"{startMb + stepMb:2.6f}", strainId1, strainId2,)) + SNPCounts.append(cursor.fetchone()[0]) + startMb += stepMb + + if (len(SNPCounts) > 0): + maxCount = max(SNPCounts) + if maxCount > 0: + for i in range(xLeftOffset, xLeftOffset + plotWidth): + snpDensity = float( + SNPCounts[i - xLeftOffset] * SNP_HEIGHT_MODIFIER / maxCount) + im_drawer.line( + xy=((i, drawSNPLocationY + (snpDensity) * zoom), + (i, drawSNPLocationY - (snpDensity) * zoom)), + fill=self.SNP_COLOR, width=1) + + def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset=(40, 120, 80, 10), zoom=1): + nameWidths = [] + yPaddingTop = 10 + colorFont = ImageFont.truetype(font=TREBUC_FILE, size=12) + if len(self.qtlresults) > 20 and self.selectedChr > -1: + rightShift = 20 + rightShiftStep = 60 + rectWidth = 10 + else: + rightShift = 40 + rightShiftStep = 80 + rectWidth = 15 + + for k, thisTrait in enumerate(self.traitList): + thisLRSColor = self.colorCollection[k] + kstep = k % 4 + if k != 0 and kstep == 0: + if nameWidths: + rightShiftStep = max(nameWidths[-4:]) + rectWidth + 20 + rightShift += rightShiftStep + + name = thisTrait.displayName() + nameWidth, nameHeight = im_drawer.textsize(name, font=colorFont) + nameWidths.append(nameWidth) + + im_drawer.rectangle( + xy=((rightShift, yPaddingTop + kstep * 15), + (rectWidth + rightShift, yPaddingTop + 10 + kstep * 15)), + fill=thisLRSColor, outline=BLACK) + im_drawer.text( + text=name, xy=(rectWidth + 2 + rightShift, + yPaddingTop + 10 + kstep * 15), + font=colorFont, fill=BLACK) + if thisTrait.db: + COORDS = "%d,%d,%d,%d" % (rectWidth + 2 + rightShift, yPaddingTop + kstep * \ + 15, rectWidth + 2 + rightShift + nameWidth, yPaddingTop + 10 + kstep * 15,) + HREF = "javascript:showDatabase3('%s','%s','%s','');" % ( + showLocusForm, thisTrait.db.name, thisTrait.name) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', coords=COORDS, href=HREF) + gifmap.append(Areas) # TODO + + def drawLegendPanel(self, canvas, offset=(40, 120, 80, 10), zoom=1): + im_drawer = ImageDraw.Draw(canvas) + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + TEXT_Y_DISPLACEMENT = -8 + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + labelFont = ImageFont.truetype(font=TREBUC_FILE, size=12 * fontZoom) + startPosY = 15 + stepPosY = 12 * fontZoom + + startPosX = canvas.size[0] - xRightOffset - 415 + if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): + startPosY = 15 + nCol = 2 + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) + + leftOffset = canvas.size[0] - xRightOffset - 190 + draw_open_polygon( + canvas, + xy=( + (leftOffset + 6, startPosY - 7), + (leftOffset - 1, startPosY + 7), + (leftOffset + 13, startPosY + 7)), + outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR + ) + TEXT_Y_DISPLACEMENT = -8 + im_drawer.text( + text="Sequence Site", + xy=(leftOffset + 20, startPosY + TEXT_Y_DISPLACEMENT), font=smallLabelFont, + fill=self.TOP_RIGHT_INFO_COLOR) + + if self.manhattan_plot != True: + im_drawer.line( + xy=((startPosX, startPosY), (startPosX + 32, startPosY)), + fill=self.LRS_COLOR, width=2) + im_drawer.text( + text=self.LRS_LOD, xy=( + startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), + font=labelFont, fill=BLACK) + startPosY += stepPosY + + if self.additiveChecked: + im_drawer.line( + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=2) + im_drawer.line( + xy=((startPosX + 18, startPosY), (startPosX + 32, startPosY)), + fill=self.ADDITIVE_COLOR_NEGATIVE, width=2) + im_drawer.text( + text='Additive Effect', xy=(startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), + font=labelFont, fill=BLACK) + startPosY += stepPosY + + if self.genotype.type == 'intercross' and self.dominanceChecked: + im_drawer.line( + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), + fill=self.DOMINANCE_COLOR_POSITIVE, width=4) + im_drawer.line( + xy=((startPosX + 18, startPosY), (startPosX + 35, startPosY)), + fill=self.DOMINANCE_COLOR_NEGATIVE, width=4) + im_drawer.text( + text='Dominance Effect', xy=(startPosX + 42, startPosY + 5), + font=labelFont, fill=BLACK) + startPosY += stepPosY + + if self.haplotypeAnalystChecked: + im_drawer.line( + xy=((startPosX - 34, startPosY), (startPosX - 17, startPosY)), + fill=self.HAPLOTYPE_POSITIVE, width=4) + im_drawer.line( + xy=((startPosX - 17, startPosY), (startPosX, startPosY)), + fill=self.HAPLOTYPE_NEGATIVE, width=4) + im_drawer.line( + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), + fill=self.HAPLOTYPE_HETEROZYGOUS, width=4) + im_drawer.line( + xy=((startPosX + 17, startPosY), (startPosX + 34, startPosY)), + fill=self.HAPLOTYPE_RECOMBINATION, width=4) + im_drawer.text( + text='Haplotypes (Pat, Mat, Het, Unk)', + xy=(startPosX + 41, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + startPosY += stepPosY + + if self.permChecked and self.nperm > 0: + thisStartX = startPosX + if self.multipleInterval and not self.bootChecked: + thisStartX = canvas.size[0] - xRightOffset - 205 + im_drawer.line( + xy=((thisStartX, startPosY), (startPosX + 32, startPosY)), + fill=self.SIGNIFICANT_COLOR, width=self.SIGNIFICANT_WIDTH) + im_drawer.line( + xy=((thisStartX, startPosY + stepPosY), + (startPosX + 32, startPosY + stepPosY)), + fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH) + im_drawer.text( + text='Significant %s = %2.2f' % ( + self.LRS_LOD, self.significant), + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + im_drawer.text( + text='Suggestive %s = %2.2f' % (self.LRS_LOD, self.suggestive), + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT + stepPosY), font=labelFont, + fill=BLACK) + + labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12 * fontZoom) + labelColor = BLACK + + if self.dataset.type == "Publish" or self.dataset.type == "Geno": + dataset_label = self.dataset.fullname + else: + dataset_label = "%s - %s" % (self.dataset.group.name, + self.dataset.fullname) + + + self.current_datetime = datetime.datetime.now().strftime("%b %d %Y %H:%M:%S") + string1 = 'UTC Timestamp: %s' % (self.current_datetime) + string2 = 'Dataset: %s' % (dataset_label) + string3 = 'Trait Hash: %s' % (self.vals_hash) + + if self.genofile_string == "": + string4 = 'Genotype File: %s.geno' % self.dataset.group.name + else: + string4 = 'Genotype File: %s' % self.genofile_string.split(":")[1] + + string6 = '' + if self.mapping_method == "gemma" or self.mapping_method == "gemma_bimbam": + if self.use_loco == "True": + string5 = 'Using GEMMA mapping method with LOCO and ' + else: + string5 = 'Using GEMMA mapping method with ' + if self.covariates != "": + string5 += 'the cofactors below:' + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) + string6 = cofactor_names + else: + string5 += 'no cofactors' + elif self.mapping_method == "rqtl_plink" or self.mapping_method == "rqtl_geno": + string5 = 'Using R/qtl mapping method with ' + if self.covariates != "": + string5 += 'the cofactors below:' + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) + string6 = cofactor_names + elif self.controlLocus and self.doControl != "false": + string5 += '%s as control' % self.controlLocus + else: + string5 += 'no cofactors' + else: + string5 = 'Using Haldane mapping function with ' + if self.controlLocus and self.doControl != "false": + string5 += '%s as control' % self.controlLocus + else: + string5 += 'no control for other QTLs' + + y_constant = 10 + if self.this_trait.name: + if self.selectedChr == -1: + identification = "Mapping on All Chromosomes for " + else: + identification = "Mapping on Chromosome %s for " % ( + self.ChrList[self.selectedChr][0]) + + if self.this_trait.symbol: + identification += "Trait: %s - %s" % ( + self.this_trait.display_name, self.this_trait.symbol) + elif self.dataset.type == "Publish": + if self.this_trait.post_publication_abbreviation: + identification += "Trait: %s - %s" % ( + self.this_trait.display_name, self.this_trait.post_publication_abbreviation) + elif self.this_trait.pre_publication_abbreviation: + identification += "Trait: %s - %s" % ( + self.this_trait.display_name, self.this_trait.pre_publication_abbreviation) + else: + identification += "Trait: %s" % (self.this_trait.display_name) + else: + identification += "Trait: %s" % (self.this_trait.display_name) + identification += " with %s samples" % (self.n_samples) + + d = 4 + max( + im_drawer.textsize(identification, font=labelFont)[0], + im_drawer.textsize(string1, font=labelFont)[0], + im_drawer.textsize(string2, font=labelFont)[0], + im_drawer.textsize(string3, font=labelFont)[0], + im_drawer.textsize(string4, font=labelFont)[0]) + im_drawer.text( + text=identification, + xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, + fill=labelColor) + y_constant += 15 + else: + d = 4 + max( + im_drawer.textsize(string1, font=labelFont)[0], + im_drawer.textsize(string2, font=labelFont)[0], + im_drawer.textsize(string3, font=labelFont)[0], + im_drawer.textsize(string4, font=labelFont)[0]) + + if len(self.transform) > 0: + transform_text = "Transform - " + if self.transform == "qnorm": + transform_text += "Quantile Normalized" + elif self.transform == "log2" or self.transform == "log10": + transform_text += self.transform.capitalize() + elif self.transform == "sqrt": + transform_text += "Square Root" + elif self.transform == "zscore": + transform_text += "Z-Score" + elif self.transform == "invert": + transform_text += "Invert +/-" + + im_drawer.text( + text=transform_text, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + y_constant += 15 + im_drawer.text( + text=string1, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + y_constant += 15 + im_drawer.text( + text=string2, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + y_constant += 15 + im_drawer.text( + text=string3, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + y_constant += 15 + im_drawer.text( + text=string4, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + y_constant += 15 + if string4 != '': + im_drawer.text( + text=string5, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + y_constant += 15 + if string5 != '': + im_drawer.text( + text=string6, xy=(xLeftOffset, y_constant * fontZoom), + font=labelFont, fill=labelColor) + + def drawHomologyBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + if self.plotScale != 'physic' or self.selectedChr == -1 or not self.homology: + return + + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + yPaddingTop = yTopOffset + + for index, homology_dict in enumerate(self.homology): + ref_strand = homology_dict["ref_strand"] + ref_start = homology_dict["ref_start"] + ref_end = homology_dict["ref_end"] + query_chr = homology_dict["query_chr"] + query_strand = homology_dict["query_strand"] + query_start = homology_dict["query_start"] + query_end = homology_dict["query_end"] + + geneLength = (ref_end - ref_start) * 1000.0 + tenPercentLength = geneLength * 0.0001 + + geneStartPix = xLeftOffset + \ + plotXScale * (float(ref_start) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(ref_end) - startMb) # at least one pixel + + if (geneEndPix < xLeftOffset): + return # this gene is not on the screen + if (geneEndPix > xLeftOffset + plotWidth): + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene + if (geneStartPix > xLeftOffset + plotWidth): + return # we are outside the valid on-screen range, so stop drawing genes + elif (geneStartPix < xLeftOffset): + geneStartPix = xLeftOffset # clip the first in-range gene + + myColor = BLACK + + outlineColor = myColor + fillColor = myColor + + TITLE = f"hg38: Chr {query_chr} from {query_start:.3f} to {query_end:.3f} Mb" + HREF = f"http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38&position=chr{query_chr}:{int(query_start * 1000000)}-{int(query_end * 1000000)}" + + # Draw Genes + geneYLocation = yPaddingTop + \ + (index % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + else: + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + + # draw the detail view + utrColor = ImageColor.getrgb("rgb(66%, 66%, 66%)") + arrowColor = ImageColor.getrgb("rgb(70%, 70%, 70%)") + + # draw the line that runs the entire length of the gene + im_drawer.line( + xy=( + (geneStartPix, geneYLocation + \ + self.EACH_GENE_HEIGHT / 2 * zoom), + (geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 * zoom)), + fill=outlineColor, width=1) + + # draw the arrows + if geneEndPix - geneStartPix < 1: + genePixRange = 1 + else: + genePixRange = int(geneEndPix - geneStartPix) + for xCoord in range(0, genePixRange): + + if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix - geneStartPix) or xCoord == 0: + if query_strand == "+": + im_drawer.line( + xy=((geneStartPix + xCoord, geneYLocation), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + im_drawer.line( + xy=((geneStartPix + xCoord, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + else: + im_drawer.line( + xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + im_drawer.line( + xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) + + gifmap.append( + HtmlGenWrapper.create_area_tag( + shape='rect', + coords=COORDS, + href=HREF, + title=TITLE, + target="_blank")) + + def drawGeneBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: + return + + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + yPaddingTop = yTopOffset + + for gIndex, theGO in enumerate(self.geneCol): + geneNCBILink = 'http://www.ncbi.nlm.nih.gov/gene?term=%s' + if self.dataset.group.species == "mouse": + exonStarts = [] + exonEnds = [] + txStart = theGO["TxStart"] + txEnd = theGO["TxEnd"] + geneLength = (txEnd - txStart) * 1000.0 + tenPercentLength = geneLength * 0.0001 + SNPdensity = theGO["snpCount"] / geneLength + + if theGO['exonStarts']: + exonStarts = list( + map(float, theGO['exonStarts'].split(",")[:-1])) + exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) + cdsStart = theGO['cdsStart'] + cdsEnd = theGO['cdsEnd'] + accession = theGO['NM_ID'] + geneSymbol = theGO["GeneSymbol"] + strand = theGO["Strand"] + exonCount = theGO["exonCount"] + + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel + + if (geneEndPix < xLeftOffset): + return # this gene is not on the screen + elif (geneEndPix > xLeftOffset + plotWidth): + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene + if (geneStartPix > xLeftOffset + plotWidth): + return # we are outside the valid on-screen range, so stop drawing genes + elif (geneStartPix < xLeftOffset): + geneStartPix = xLeftOffset # clip the first in-range gene + + # color the gene based on SNP density + # found earlier, needs to be recomputed as snps are added + # always apply colors now, even if SNP Track not checked - Zach 11/24/2010 + + densities = [1.0000000000000001e-05, 0.094094033555233408, + 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] + if SNPdensity < densities[0]: + myColor = BLACK + elif SNPdensity < densities[1]: + myColor = PURPLE + elif SNPdensity < densities[2]: + myColor = DARKBLUE + elif SNPdensity < densities[3]: + myColor = DARKGREEN + elif SNPdensity < densities[4]: + myColor = GOLD + elif SNPdensity < densities[5]: + myColor = DARKORANGE + else: + myColor = DARKRED + + outlineColor = myColor + fillColor = myColor + + TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % ( + geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) + # NL: 06-02-2011 Rob required to change this link for gene related + HREF = geneNCBILink % geneSymbol + + elif self.dataset.group.species == "rat": + exonStarts = [] + exonEnds = [] + txStart = theGO["TxStart"] + txEnd = theGO["TxEnd"] + cdsStart = theGO["TxStart"] + cdsEnd = theGO["TxEnd"] + geneSymbol = theGO["GeneSymbol"] + strand = theGO["Strand"] + exonCount = 0 + + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel + + if (geneEndPix < xLeftOffset): + return # this gene is not on the screen + elif (geneEndPix > xLeftOffset + plotWidth): + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene + if (geneStartPix > xLeftOffset + plotWidth): + return # we are outside the valid on-screen range, so stop drawing genes + elif (geneStartPix < xLeftOffset): + geneStartPix = xLeftOffset # clip the first in-range gene + + outlineColor = DARKBLUE + fillColor = DARKBLUE + TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % ( + geneSymbol, float(txStart), float(txEnd), strand) + # NL: 06-02-2011 Rob required to change this link for gene related + HREF = geneNCBILink % geneSymbol + else: + outlineColor = ORANGE + fillColor = ORANGE + TITLE = "Gene: %s" % geneSymbol + + # Draw Genes + geneYLocation = yPaddingTop + \ + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + else: + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + + # draw the detail view + if self.endMb - self.startMb <= self.DRAW_DETAIL_MB and geneEndPix - geneStartPix > self.EACH_GENE_ARROW_SPACING * 3: + utrColor = ImageColor.getrgb("rgb(66%, 66%, 66%)") + arrowColor = ImageColor.getrgb("rgb(70%, 70%, 70%)") + + # draw the line that runs the entire length of the gene + im_drawer.line( + xy=( + (geneStartPix, geneYLocation + \ + self.EACH_GENE_HEIGHT / 2 * zoom), + (geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 * zoom)), + fill=outlineColor, width=1) + + # draw the arrows + if geneEndPix - geneStartPix < 1: + genePixRange = 1 + else: + genePixRange = int(geneEndPix - geneStartPix) + for xCoord in range(0, genePixRange): + + if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix - geneStartPix) or xCoord == 0: + if strand == "+": + im_drawer.line( + xy=((geneStartPix + xCoord, geneYLocation), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + im_drawer.line( + xy=((geneStartPix + xCoord, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + else: + im_drawer.line( + xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + im_drawer.line( + xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), + fill=arrowColor, width=1) + + # draw the blocks for the exon regions + for i in range(0, len(exonStarts)): + exonStartPix = ( + exonStarts[i] - startMb) * plotXScale + xLeftOffset + exonEndPix = (exonEnds[i] - startMb) * \ + plotXScale + xLeftOffset + if (exonStartPix < xLeftOffset): + exonStartPix = xLeftOffset + if (exonEndPix < xLeftOffset): + exonEndPix = xLeftOffset + if (exonEndPix > xLeftOffset + plotWidth): + exonEndPix = xLeftOffset + plotWidth + if (exonStartPix > xLeftOffset + plotWidth): + exonStartPix = xLeftOffset + plotWidth + im_drawer.rectangle( + xy=((exonStartPix, geneYLocation), + (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) + + # draw gray blocks for 3' and 5' UTR blocks + if cdsStart and cdsEnd: + utrStartPix = (txStart - startMb) * \ + plotXScale + xLeftOffset + utrEndPix = (cdsStart - startMb) * plotXScale + xLeftOffset + if (utrStartPix < xLeftOffset): + utrStartPix = xLeftOffset + if (utrEndPix < xLeftOffset): + utrEndPix = xLeftOffset + if (utrEndPix > xLeftOffset + plotWidth): + utrEndPix = xLeftOffset + plotWidth + if (utrStartPix > xLeftOffset + plotWidth): + utrStartPix = xLeftOffset + plotWidth + + if self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: + if strand == "-": + labelText = "3'" + else: + labelText = "5'" + im_drawer.text( + text=labelText, + xy=(utrStartPix - 9, geneYLocation + \ + self.EACH_GENE_HEIGHT), + font=ImageFont.truetype(font=ARIAL_FILE, size=2)) + + # the second UTR region + + utrStartPix = (cdsEnd - startMb) * plotXScale + xLeftOffset + utrEndPix = (txEnd - startMb) * plotXScale + xLeftOffset + if (utrStartPix < xLeftOffset): + utrStartPix = xLeftOffset + if (utrEndPix < xLeftOffset): + utrEndPix = xLeftOffset + if (utrEndPix > xLeftOffset + plotWidth): + utrEndPix = xLeftOffset + plotWidth + if (utrStartPix > xLeftOffset + plotWidth): + utrStartPix = xLeftOffset + plotWidth + + if self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: + if strand == "-": + labelText = "5'" + else: + labelText = "3'" + im_drawer.text( + text=labelText, + xy=(utrEndPix + 2, geneYLocation + \ + self.EACH_GENE_HEIGHT), + font=ImageFont.truetype(font=ARIAL_FILE, size=2)) + + # draw the genes as rectangles + else: + im_drawer.rectangle( + xy=((geneStartPix, geneYLocation), + (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) + + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) + # NL: 06-02-2011 Rob required to display NCBI info in a new window + gifmap.append( + HtmlGenWrapper.create_area_tag( + shape='rect', + coords=COORDS, + href=HREF, + title=TITLE, + target="_blank")) + +# BEGIN HaplotypeAnalyst + def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: + return + + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + + yPaddingTop = yTopOffset + + thisTrait = self.this_trait + + samplelist = list(self.genotype.prgy) + + smd = [] + for sample in self.sample_vals_dict.keys(): + if self.sample_vals_dict[sample] != "x" and sample in samplelist: + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) + smd.append(temp) + else: + continue + + smd.sort(key=lambda A: A.value) + smd.reverse() + + oldgeneEndPix = -1 + # Initializing plotRight, error before + plotRight = xRightOffset + + im_drawer = ImageDraw.Draw(canvas) + +# find out PlotRight + for _chr in self.genotype: + if _chr.name == self.ChrList[self.selectedChr][0]: + for i, _locus in enumerate(_chr): + txStart = _chr[i].Mb + txEnd = _chr[i].Mb + + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) - 0 + + drawit = 1 + if (geneStartPix < xLeftOffset): + drawit = 0 + if (geneStartPix > xLeftOffset + plotWidth): + drawit = 0 + + if drawit == 1: + if _chr[i].name != " - ": + plotRight = geneEndPix + 4 + +# end find out PlotRight + + firstGene = 1 + lastGene = 0 + + # Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" + # was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. + # Now there should always be some value set for "oldgeno" - Zach 12/14/2010 + oldgeno = [None] * len(self.strainlist) + + for i, _chr in enumerate(self.genotype): + if _chr.name == self.ChrList[self.selectedChr][0]: + for j, _locus in enumerate(_chr): + txStart = _chr[j].Mb + txEnd = _chr[j].Mb + + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) + 0 + + if oldgeneEndPix >= xLeftOffset: + drawStart = oldgeneEndPix + 4 + else: + drawStart = xLeftOffset + 3 + + drawEnd = plotRight - 9 + + drawit = 1 + + if (geneStartPix < xLeftOffset): + if firstGene == 1: + drawit = 1 + else: + drawit = 0 + + elif (geneStartPix > (xLeftOffset + plotWidth - 3)): + if lastGene == 0: + drawit = 1 + drawEnd = xLeftOffset + plotWidth - 6 + lastGene = 1 + else: + break + + else: + firstGene = 0 + drawit = 1 + + if drawit == 1: + myColor = DARKBLUE + outlineColor = myColor + fillColor = myColor + + maxind = 0 + + # Draw Genes + + geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * \ + (self.EACH_GENE_HEIGHT) * zoom + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + else: + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + + if _chr[j].name != " - ": + + if (firstGene == 1) and (lastGene != 1): + oldgeneEndPix = drawStart = xLeftOffset + oldgeno = _chr[j].genotype + continue + + for k, _geno in enumerate(_chr[j].genotype): + plotbxd = 0 + if samplelist[k] in [item.name for item in smd]: + plotbxd = 1 + + if (plotbxd == 1): + ind = 0 + if samplelist[k] in [item.name for item in smd]: + ind = [item.name for item in smd].index( + samplelist[k]) + + maxind = max(ind, maxind) + + # lines + if (oldgeno[k] == -1 and _geno == -1): + mylineColor = self.HAPLOTYPE_NEGATIVE + elif (oldgeno[k] == 1 and _geno == 1): + mylineColor = self.HAPLOTYPE_POSITIVE + elif (oldgeno[k] == 0 and _geno == 0): + mylineColor = self.HAPLOTYPE_HETEROZYGOUS + else: + mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown + + im_drawer.line( + xy=((drawStart, + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + (drawEnd, + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) + + fillColor = BLACK + outlineColor = BLACK + if lastGene == 0: + im_drawer.rectangle( + xy=((geneStartPix, + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + (geneEndPix, + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT + 2 * self.EACH_GENE_HEIGHT * zoom)), + outline=outlineColor, fill=fillColor) + + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation + ind * self.EACH_GENE_HEIGHT, geneEndPix + 1, (geneYLocation + ind * self.EACH_GENE_HEIGHT)) + TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % ( + samplelist[k], _chr[j].name, float(txStart)) + HREF = '' + gifmap.append( + HtmlGenWrapper.create_area_tag( + shape='rect', + coords=COORDS, + href=HREF, + title=TITLE)) + + # if there are no more markers in a chromosome, the plotRight value calculated above will be before the plotWidth + # resulting in some empty space on the right side of the plot area. This draws an "unknown" bar from plotRight to the edge. + if (plotRight < (xLeftOffset + plotWidth - 3)) and (lastGene == 0): + drawEnd = xLeftOffset + plotWidth - 6 + mylineColor = self.HAPLOTYPE_RECOMBINATION + im_drawer.line( + xy=((plotRight, + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + (drawEnd, + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) + + if lastGene == 0: + draw_rotated_text( + canvas, text="%s" % (_chr[j].name), + font=ImageFont.truetype(font=VERDANA_FILE, + size=12), + xy=(geneStartPix, + geneYLocation + 17 + 2 * maxind * self.EACH_GENE_HEIGHT * zoom), + fill=BLACK, angle=-90) + + oldgeneEndPix = geneEndPix + oldgeno = _chr[j].genotype + firstGene = 0 + else: + lastGene = 0 + + for i, _chr in enumerate(self.genotype): + if _chr.name == self.ChrList[self.selectedChr][0]: + for j, _geno in enumerate(_chr[1].genotype): + + plotbxd = 0 + if samplelist[j] in [item.name for item in smd]: + plotbxd = 1 + + if (plotbxd == 1): + ind = [item.name for item in smd].index( + samplelist[j]) - 1 + expr = smd[ind+1].value + + # Place where font is hardcoded + im_drawer.text( + text="%s" % (samplelist[j]), + xy=((xLeftOffset + plotWidth + 10), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), + fill=BLACK) + im_drawer.text( + text="%2.2f" % (expr), + xy=((xLeftOffset + plotWidth + 60), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), + fill=BLACK) + +# END HaplotypeAnalyst + + def drawClickBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + if self.plotScale != 'physic' or self.selectedChr == -1: + return + + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + # only draw this many clickable regions (if you set it higher, you get more precision in clicking, + # but it makes the HTML huge, and takes forever to render the page in the first place) + # Draw the bands that you can click on to go to UCSC / Ensembl + MAX_CLICKABLE_REGION_DIVISIONS = 100 + clickableRegionLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=9) + pixelStep = max( + 5, int(float(plotWidth) / MAX_CLICKABLE_REGION_DIVISIONS)) + # pixelStep: every N pixels, we make a new clickable area for the user to go to that area of the genome. + + numBasesCurrentlyOnScreen = self.kONE_MILLION * \ + abs(startMb - endMb) # Number of bases on screen now + flankingWidthInBases = int( + min((float(numBasesCurrentlyOnScreen) / 2.0), (5 * self.kONE_MILLION))) + webqtlZoomWidth = numBasesCurrentlyOnScreen / 16.0 + # Flanking width should be such that we either zoom in to a 10 million base region, or we show the clicked region at the same scale as we are currently seeing. + + currentChromosome = self.genotype[0].name + i = 0 + + paddingTop = yTopOffset + phenogenPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + ucscPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 3 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + else: + ucscPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + + if zoom == 1: + for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep): + + calBase = self.kONE_MILLION * \ + (startMb + (endMb - startMb) * \ + (pixel - xLeftOffset - 0.0) / plotWidth) + + xBrowse1 = pixel + xBrowse2 = min(xLeftOffset + plotWidth, + (pixel + pixelStep - 1)) + + WEBQTL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, paddingTop, xBrowse2, (paddingTop + self.BAND_HEIGHT)) + WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max( + 0, (calBase - webqtlZoomWidth)) / 1000000.0, (calBase + webqtlZoomWidth) / 1000000.0) + + WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" + gifmap.append( + HtmlGenWrapper.create_area_tag( + shape='rect', + coords=WEBQTL_COORDS, + href=WEBQTL_HREF, + title=WEBQTL_TITLE)) + im_drawer.rectangle( + xy=((xBrowse1, paddingTop), + (xBrowse2, (paddingTop + self.BAND_HEIGHT))), + outline=self.CLICKABLE_WEBQTL_REGION_COLOR, + fill=self.CLICKABLE_WEBQTL_REGION_COLOR) + im_drawer.line( + xy=((xBrowse1, paddingTop), (xBrowse1, + (paddingTop + self.BAND_HEIGHT))), + fill=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR) + + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + PHENOGEN_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT)) + if self.dataset.group.species == "mouse": + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + else: + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + PHENOGEN_TITLE = "Click to view this section of the genome in PhenoGen" + gifmap.append( + HtmlGenWrapper.create_area_tag( + shape='rect', + coords=PHENOGEN_COORDS, + href=PHENOGEN_HREF, + title=PHENOGEN_TITLE)) + im_drawer.rectangle( + xy=((xBrowse1, phenogenPaddingTop), + (xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT))), + outline=self.CLICKABLE_PHENOGEN_REGION_COLOR, + fill=self.CLICKABLE_PHENOGEN_REGION_COLOR) + im_drawer.line( + xy=((xBrowse1, phenogenPaddingTop), (xBrowse1, + (phenogenPaddingTop + self.BAND_HEIGHT))), + fill=self.CLICKABLE_PHENOGEN_REGION_OUTLINE_COLOR) + + UCSC_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT)) + if self.dataset.group.species == "mouse": + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) + else: + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + UCSC_TITLE = "Click to view this section of the genome in the UCSC Genome Browser" + gifmap.append( + HtmlGenWrapper.create_area_tag( + shape='rect', + coords=UCSC_COORDS, + href=UCSC_HREF, + title=UCSC_TITLE)) + im_drawer.rectangle( + xy=((xBrowse1, ucscPaddingTop), + (xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT))), + outline=self.CLICKABLE_UCSC_REGION_COLOR, + fill=self.CLICKABLE_UCSC_REGION_COLOR) + im_drawer.line( + xy=((xBrowse1, ucscPaddingTop), + (xBrowse1, (ucscPaddingTop + self.BAND_HEIGHT))), + fill=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR) + + ENSEMBL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT)) + if self.dataset.group.species == "mouse": + ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + else: + ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + ENSEMBL_TITLE = "Click to view this section of the genome in the Ensembl Genome Browser" + gifmap.append(HtmlGenWrapper.create_area_tag( + shape='rect', + coords=ENSEMBL_COORDS, + href=ENSEMBL_HREF, + title=ENSEMBL_TITLE)) + im_drawer.rectangle( + xy=((xBrowse1, ensemblPaddingTop), + (xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT))), + outline=self.CLICKABLE_ENSEMBL_REGION_COLOR, + fill=self.CLICKABLE_ENSEMBL_REGION_COLOR) + im_drawer.line( + xy=((xBrowse1, ensemblPaddingTop), + (xBrowse1, (ensemblPaddingTop + self.BAND_HEIGHT))), + fill=self.CLICKABLE_ENSEMBL_REGION_OUTLINE_COLOR) + # end for + + im_drawer.text( + text="Click to view the corresponding section of the genome in an 8x expanded WebQTL map", + xy=((xLeftOffset + 10), paddingTop), # + self.BAND_HEIGHT/2), + font=clickableRegionLabelFont, + fill=self.CLICKABLE_WEBQTL_TEXT_COLOR) + if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": + im_drawer.text( + text="Click to view the corresponding section of the genome in PhenoGen", + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), phenogenPaddingTop), + font=clickableRegionLabelFont, fill=self.CLICKABLE_PHENOGEN_TEXT_COLOR) + im_drawer.text( + text="Click to view the corresponding section of the genome in the UCSC Genome Browser", + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ucscPaddingTop), + font=clickableRegionLabelFont, fill=self.CLICKABLE_UCSC_TEXT_COLOR) + im_drawer.text( + text="Click to view the corresponding section of the genome in the Ensembl Genome Browser", + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ensemblPaddingTop), + font=clickableRegionLabelFont, fill=self.CLICKABLE_ENSEMBL_TEXT_COLOR) + + # draw the gray text + chrFont = ImageFont.truetype( + font=VERDANA_BOLD_FILE, size=26 * zoom) + chrX = xLeftOffset + plotWidth - 2 - im_drawer.textsize( + "Chr %s" % self.ChrList[self.selectedChr][0], font=chrFont)[0] + im_drawer.text( + text="Chr %s" % self.ChrList[self.selectedChr][0], + xy=(chrX, paddingTop), font=chrFont, fill=GRAY) + # end of drawBrowserClickableRegions + else: + # draw the gray text + chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26 * zoom) + chrX = xLeftOffset + (plotWidth - im_drawer.textsize( + "Chr %s" % currentChromosome, font=chrFont)[0]) / 2 + im_drawer.text( + text="Chr %s" % currentChromosome, xy=(chrX, 32), font=chrFont, + fill=GRAY) + # end of drawBrowserClickableRegions + pass + + def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yZero = canvas.size[1] - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + # Parameters + NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks + X_MAJOR_TICK_THICKNESS = 3 + X_MINOR_TICK_THICKNESS = 1 + X_AXIS_THICKNESS = 1 * zoom + + # ======= Alex: Draw the X-axis labels (megabase location) + MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15 * zoom) + xMajorTickHeight = 10 * zoom # How high the tick extends below the axis + xMinorTickHeight = 5 * zoom + xAxisTickMarkColor = BLACK + xAxisLabelColor = BLACK + fontHeight = 12 * fontZoom # How tall the font that we're using is + spacingFromLabelToAxis = 10 + + if self.plotScale == 'physic': + strYLoc = yZero + MBLabelFont.font.height / 2 + # Physical single chromosome view + if self.selectedChr > -1: + XScale = Plot.detScale(startMb, endMb) + XStart, XEnd, XStep = XScale + if XStep < 8: + XStep *= 2 + spacingAmtX = spacingAmt = (XEnd - XStart) / XStep + + j = 0 + while abs(spacingAmtX - int(spacingAmtX)) >= spacingAmtX / 100.0 and j < 6: + j += 1 + spacingAmtX *= 10 + + formatStr = '%%2.%df' % j + + for counter, _Mb in enumerate(Plot.frange(XStart, XEnd, spacingAmt / NUM_MINOR_TICKS)): + if _Mb < startMb or _Mb > endMb: + continue + Xc = xLeftOffset + plotXScale * (_Mb - startMb) + if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark + im_drawer.line(xy=((Xc, yZero), + (Xc, yZero + xMajorTickHeight)), + fill=xAxisTickMarkColor, + width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark + # What Mbase location to put on the label + labelStr = str(formatStr % _Mb) + strWidth, strHeight = im_drawer.textsize( + labelStr, font=MBLabelFont) + drawStringXc = (Xc - (strWidth / 2.0)) + im_drawer.text(xy=(drawStringXc, strYLoc), + text=labelStr, font=MBLabelFont, + fill=xAxisLabelColor) + else: + im_drawer.line(xy=((Xc, yZero), + (Xc, yZero + xMinorTickHeight)), + fill=xAxisTickMarkColor, + width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark + + # Physical genome wide view + else: + distScale = 0 + startPosX = xLeftOffset + for i, distLen in enumerate(self.ChrLengthDistList): + if distScale == 0: # universal scale in whole genome mapping + if distLen > 75: + distScale = 25 + elif distLen > 30: + distScale = 10 + else: + distScale = 5 + for j, tickdists in enumerate(range(distScale, int(ceil(distLen)), distScale)): + im_drawer.line( + xy=((startPosX + tickdists * plotXScale, yZero), + (startPosX + tickdists * plotXScale, yZero + 7)), + fill=BLACK, width=1 * zoom) + if j % 2 == 0: + draw_rotated_text( + canvas, text=str(tickdists), font=MBLabelFont, + xy=(startPosX + tickdists * plotXScale, + yZero + 10 * zoom), fill=BLACK, angle=270) + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale + + megabaseLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + im_drawer.text( + text="Megabases", + xy=( + xLeftOffset + (plotWidth - im_drawer.textsize( + "Megabases", font=megabaseLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), + font=megabaseLabelFont, fill=BLACK) + pass + else: + strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height / 2 + ChrAInfo = [] + preLpos = -1 + distinctCount = 0.0 + + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes + for i, _chr in enumerate(self.genotype): + thisChr = [] + Locus0CM = _chr[0].cM + nLoci = len(_chr) + if nLoci <= 8: + for _locus in _chr: + if _locus.name != ' - ': + if _locus.cM != preLpos: + distinctCount += 1 + preLpos = _locus.cM + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) + else: + for j in (0, round(nLoci / 4), round(nLoci / 2), round(nLoci * 3 / 4), -1): + while _chr[j].name == ' - ': + j += 1 + if _chr[j].cM != preLpos: + distinctCount += 1 + preLpos = _chr[j].cM + thisChr.append( + [_chr[j].name, _chr[j].cM - Locus0CM]) + ChrAInfo.append(thisChr) + else: + for i, _chr in enumerate(self.genotype): + if _chr.name == self.ChrList[self.selectedChr][0]: + thisChr = [] + Locus0CM = _chr[0].cM + for _locus in _chr: + if _locus.name != ' - ': + if _locus.cM != preLpos: + distinctCount += 1 + preLpos = _locus.cM + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) + ChrAInfo.append(thisChr) + + stepA = (plotWidth + 0.0) / distinctCount + + LRectWidth = 10 + LRectHeight = 3 + offsetA = -stepA + lineColor = LIGHTBLUE + startPosX = xLeftOffset + + for j, ChrInfo in enumerate(ChrAInfo): + preLpos = -1 + for i, item in enumerate(ChrInfo): + Lname, Lpos = item + if Lpos != preLpos: + offsetA += stepA + differ = 1 + else: + differ = 0 + preLpos = Lpos + Lpos *= plotXScale + if self.selectedChr > -1: + Zorder = i % 5 + else: + Zorder = 0 + if differ: + im_drawer.line( + xy=((startPosX + Lpos, yZero), (xLeftOffset + offsetA,\ + yZero + 25)), + fill=lineColor) + im_drawer.line( + xy=((xLeftOffset + offsetA, yZero + 25), (xLeftOffset + offsetA,\ + yZero + 40 + Zorder * (LRectWidth + 3))), + fill=lineColor) + rectColor = ORANGE + else: + im_drawer.line( + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) - 3), (\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3))), + fill=lineColor) + rectColor = DEEPPINK + im_drawer.rectangle( + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3)), + (xLeftOffset + offsetA - LRectHeight, + yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth)), + outline=rectColor, fill=rectColor, width=0) + COORDS = "%d,%d,%d,%d" % (xLeftOffset + offsetA - LRectHeight, yZero + 40 + Zorder * (LRectWidth + 3),\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth) + HREF = "/show_trait?trait_id=%s&dataset=%s" % ( + Lname, self.dataset.group.name + "Geno") + #HREF="javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm,fd.RISet+"Geno", Lname) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', + coords=COORDS, + href=HREF, + target="_blank", + title="Locus : {}".format(Lname)) + gifmap.append(Areas) + # piddle bug + if j == 0: + im_drawer.line( + xy=((startPosX, yZero), (startPosX, yZero + 40)), + fill=lineColor) + startPosX += (self.ChrLengthDistList[j] + \ + self.GraphInterval) * plotXScale + + centimorganLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + im_drawer.text( + text="Centimorgans", + xy=(xLeftOffset + (plotWidth - im_drawer.textsize( + "Centimorgans", font=centimorganLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), + font=centimorganLabelFont, fill=BLACK) + + im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset + plotWidth, yZero)), + fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself + + def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): + im_drawer = ImageDraw.Draw(canvas) + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + + INTERCROSS = (self.genotype.type == "intercross") + + # draw the LRS scale + # We first determine whether or not we are using a sliding scale. + # If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. + # LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. + # if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. + + # ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD + if self.lrsMax <= 0: # sliding scale + if "lrs_value" in self.qtlresults[0]: + LRS_LOD_Max = max([result['lrs_value'] + for result in self.qtlresults]) + if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": + LRS_LOD_Max = LRS_LOD_Max / self.LODFACTOR + if self.permChecked and self.nperm > 0 and not self.multipleInterval: + self.significant = min( + self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) + else: + if self.permChecked and self.nperm > 0 and not self.multipleInterval: + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) + else: + pass + else: + LRS_LOD_Max = max([result['lod_score'] + for result in self.qtlresults]) + 1 + if self.LRS_LOD == "LRS": + LRS_LOD_Max = LRS_LOD_Max * self.LODFACTOR + if self.permChecked and self.nperm > 0 and not self.multipleInterval: + self.significant = min( + self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) + else: + if self.permChecked and self.nperm > 0 and not self.multipleInterval: + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) + else: + pass + + if self.permChecked and self.nperm > 0 and not self.multipleInterval: + LRS_LOD_Max = max(self.significant, LRS_LOD_Max) + + # genotype trait will give infinite LRS + LRS_LOD_Max = min(LRS_LOD_Max, webqtlConfig.MAXLRS) + else: + LRS_LOD_Max = self.lrsMax + + # ZS: Needed to pass to genome browser + js_data = json.loads(self.js_data) + if self.LRS_LOD == "LRS": + js_data['max_score'] = LRS_LOD_Max / 4.61 + else: + js_data['max_score'] = LRS_LOD_Max + self.js_data = json.dumps(js_data) + + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + + yZero = yTopOffset + plotHeight + LRSHeightThresh = drawAreaHeight + AdditiveHeightThresh = drawAreaHeight / 2 + DominanceHeightThresh = drawAreaHeight / 2 + + if LRS_LOD_Max > 100: + LRSScale = 20.0 + elif LRS_LOD_Max > 20: + LRSScale = 5.0 + elif LRS_LOD_Max > 7.5: + LRSScale = 2.5 + else: + LRSScale = 1.0 + + LRSAxisList = Plot.frange(LRSScale, LRS_LOD_Max, LRSScale) + + # ZS: Convert to int if all axis values are whole numbers + all_int = True + for item in LRSAxisList: + if isinstance(item, int): + continue + else: + all_int = False + break + # TODO(PIL): Convert from PIL + # if all_int: + # max_lrs_width = canvas.stringWidth("%d" % LRS_LOD_Max, font=LRSScaleFont) + 40 + # else: + # max_lrs_width = canvas.stringWidth("%2.1f" % LRS_LOD_Max, font=LRSScaleFont) + 30 + + # draw the "LRS" or "LOD" string to the left of the axis + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + yZero = yTopOffset + plotHeight + + # TEXT_X_DISPLACEMENT = -20 + #TEXT_Y_DISPLACEMENT = -215 + if all_int: + TEXT_X_DISPLACEMENT = -12 + else: + TEXT_X_DISPLACEMENT = -30 + if self.LRS_LOD == "-logP": + TEXT_Y_DISPLACEMENT = -242 + else: + TEXT_Y_DISPLACEMENT = -210 + draw_rotated_text( + canvas, text=self.LRS_LOD, font=LRSLODFont, + xy=(xLeftOffset - im_drawer.textsize( + "999.99", font=LRSScaleFont)[0] - 15 * (zoom - 1) + TEXT_X_DISPLACEMENT, + yZero + TEXT_Y_DISPLACEMENT - 300 * (zoom - 1)), + fill=BLACK, angle=90) + + for item in LRSAxisList: + if LRS_LOD_Max == 0.0: + LRS_LOD_Max = 0.000001 + yTopOffset + 30 * (zoom - 1) + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh + im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset - 4, yLRS)), + fill=self.LRS_COLOR, width=1 * zoom) + if all_int: + scaleStr = "%d" % item + else: + scaleStr = "%2.1f" % item + # Draw the LRS/LOD Y axis label + TEXT_Y_DISPLACEMENT = -10 + im_drawer.text( + text=scaleStr, + xy=(xLeftOffset - 4 - im_drawer.textsize(scaleStr, font=LRSScaleFont)[0] - 5, + yLRS + TEXT_Y_DISPLACEMENT), + font=LRSScaleFont, fill=self.LRS_COLOR) + + if self.permChecked and self.nperm > 0 and not self.multipleInterval: + significantY = yZero - self.significant * LRSHeightThresh / LRS_LOD_Max + suggestiveY = yZero - self.suggestive * LRSHeightThresh / LRS_LOD_Max + # significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] + # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] + startPosX = xLeftOffset + + # "Significant" and "Suggestive" Drawing Routine + # ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...) + + # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function + def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): + rightEdge = xLeftOffset + plotWidth + im_drawer.line( + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY), + (rightEdge, suggestiveY)), + fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH * zoom + # ,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + ) + im_drawer.line( + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, significantY), + (rightEdge, significantY)), + fill=self.SIGNIFICANT_COLOR, + width=self.SIGNIFICANT_WIDTH * zoom + # , clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + ) + sugg_coords = "%d, %d, %d, %d" % ( + start_pos_x, suggestiveY - 2, rightEdge + 2 * zoom, suggestiveY + 2) + sig_coords = "%d, %d, %d, %d" % ( + start_pos_x, significantY - 2, rightEdge + 2 * zoom, significantY + 2) + + if self.LRS_LOD == 'LRS': + sugg_title = "Suggestive LRS = %0.2f" % self.suggestive + sig_title = "Significant LRS = %0.2f" % self.significant + else: + sugg_title = "Suggestive LOD = %0.2f" % ( + self.suggestive / 4.61) + sig_title = "Significant LOD = %0.2f" % ( + self.significant / 4.61) + Areas1 = HtmlGenWrapper.create_area_tag( + shape='rect', + coords=sugg_coords, + title=sugg_title) + Areas2 = HtmlGenWrapper.create_area_tag( + shape='rect', + coords=sig_coords, + title=sig_title) + gifmap.append(Areas1) + gifmap.append(Areas2) + + start_pos_x += (chr_length_dist + \ + self.GraphInterval) * plotXScale + return start_pos_x + + for i, _chr in enumerate(self.genotype): + if self.selectedChr != -1: + if _chr.name == self.ChrList[self.selectedChr][0]: + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[0]) + break + else: + continue + else: + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[i]) + + if self.multipleInterval: + lrsEdgeWidth = 1 + else: + if self.additiveChecked: + additiveMax = max([abs(X['additive']) + for X in self.qtlresults]) + lrsEdgeWidth = 3 + + if zoom == 2: + lrsEdgeWidth = 2 * lrsEdgeWidth + + LRSCoordXY = [] + AdditiveCoordXY = [] + DominanceCoordXY = [] + + symbolFont = ImageFont.truetype( + font=FNT_BS_FILE, size=5) # ZS: For Manhattan Plot + + previous_chr = 1 + previous_chr_as_int = 0 + lineWidth = 1 + oldStartPosX = 0 + startPosX = xLeftOffset + for i, qtlresult in enumerate(self.qtlresults): + m = 0 + thisLRSColor = self.colorCollection[0] + if qtlresult['chr'] != previous_chr and self.selectedChr == -1: + if self.manhattan_plot != True and len(LRSCoordXY) > 1: + draw_open_polygon(canvas, xy=LRSCoordXY, + outline=thisLRSColor, width=lrsEdgeWidth) + + if not self.multipleInterval and self.additiveChecked: + plusColor = self.ADDITIVE_COLOR_POSITIVE + minusColor = self.ADDITIVE_COLOR_NEGATIVE + for k, aPoint in enumerate(AdditiveCoordXY): + if k > 0: + Xc0, Yc0 = AdditiveCoordXY[k - 1] + Xc, Yc = aPoint + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 + Xcm = Xc + else: + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 + if Yc0 < yZero: + im_drawer.line( + xy=((Xc0, Yc0), (Xcm, yZero)), + fill=plusColor, width=lineWidth + ) + im_drawer.line( + xy=((Xcm, yZero), + (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xcm, yZero)), + fill=minusColor, width=lineWidth + ) + im_drawer.line( + xy=((Xcm, yZero), (Xc, Yc)), + fill=plusColor, width=lineWidth + ) + elif (Yc0 - yZero) * (Yc - yZero) > 0: + if Yc < yZero: + im_drawer.line( + xy=((Xc0, Yc0), (Xc, Yc)), + fill=plusColor, + width=lineWidth + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + ) + else: + minYc = min(Yc - yZero, Yc0 - yZero) + if minYc < 0: + im_drawer.line( + xy=((Xc0, Yc0), (Xc, Yc)), + fill=plusColor, width=lineWidth + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + ) + + LRSCoordXY = [] + AdditiveCoordXY = [] + previous_chr = qtlresult['chr'] + previous_chr_as_int += 1 + newStartPosX = ( + self.ChrLengthDistList[previous_chr_as_int - 1] + self.GraphInterval) * plotXScale + if newStartPosX != oldStartPosX: + startPosX += newStartPosX + oldStartPosX = newStartPosX + + # This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used + this_chr = str(self.ChrList[self.selectedChr][0]) + if self.plotScale != "physic": + this_chr = str(self.ChrList[self.selectedChr][1] + 1) + + if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr: + if self.plotScale != "physic" and self.mapping_method == "reaper" and not self.manhattan_plot: + start_cm = self.genotype[self.selectedChr - 1][0].cM + Xc = startPosX + (qtlresult['cM'] - start_cm) * plotXScale + if hasattr(self.genotype, "filler"): + if self.genotype.filler: + if self.selectedChr != -1: + Xc = startPosX + \ + (qtlresult['Mb'] - start_cm) * plotXScale + else: + Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) + else: + if self.selectedChr != -1 and qtlresult['Mb'] > endMb: + Xc = startPosX + endMb * plotXScale + else: + if qtlresult['Mb'] - startMb < 0: + continue + Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale + + # updated by NL 06-18-2011: + # fix the over limit LRS graph issue since genotype trait may give infinite LRS; + # for any lrs is over than 460(LRS max in this system), it will be reset to 460 + + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh + + if 'lrs_value' in qtlresult: + if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': + Yc = yZero - webqtlConfig.MAXLRS * \ + LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) + else: + Yc = yZero - \ + qtlresult['lrs_value'] * LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) + else: + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max + else: + Yc = yZero - \ + qtlresult['lrs_value'] * \ + LRSHeightThresh / LRS_LOD_Max + else: + if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf': + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max + else: + if self.LRS_LOD == "LRS": + Yc = yZero - \ + qtlresult['lod_score'] * self.LODFACTOR * \ + LRSHeightThresh / LRS_LOD_Max + else: + Yc = yZero - \ + qtlresult['lod_score'] * \ + LRSHeightThresh / LRS_LOD_Max + + if self.manhattan_plot == True: + if self.color_scheme == "single": + point_color = self.manhattan_single_color + elif self.color_scheme == "varied": + point_color = DISTINCT_COLOR_LIST[previous_chr_as_int] + else: + if self.selectedChr == -1 and (previous_chr_as_int % 2 == 1): + point_color = RED + else: + point_color = BLUE + + im_drawer.text( + text="5", + xy=( + Xc - im_drawer.textsize("5", + font=symbolFont)[0] / 2 + 1, + Yc - 4), + fill=point_color, font=symbolFont) + else: + LRSCoordXY.append((Xc, Yc)) + + if not self.multipleInterval and self.additiveChecked: + if additiveMax == 0.0: + additiveMax = 0.000001 + Yc = yZero - qtlresult['additive'] * \ + AdditiveHeightThresh / additiveMax + AdditiveCoordXY.append((Xc, Yc)) + + if self.selectedChr != -1 and qtlresult['Mb'] > endMb and endMb != -1: + break + m += 1 + + if self.manhattan_plot != True and len(LRSCoordXY) > 1: + draw_open_polygon(canvas, xy=LRSCoordXY, outline=thisLRSColor, + width=lrsEdgeWidth) + + if not self.multipleInterval and self.additiveChecked: + plusColor = self.ADDITIVE_COLOR_POSITIVE + minusColor = self.ADDITIVE_COLOR_NEGATIVE + for k, aPoint in enumerate(AdditiveCoordXY): + if k > 0: + Xc0, Yc0 = AdditiveCoordXY[k - 1] + Xc, Yc = aPoint + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 + Xcm = Xc + else: + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 + if Yc0 < yZero: + im_drawer.line( + xy=((Xc0, Yc0), (Xcm, yZero)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + im_drawer.line( + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xcm, yZero)), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + im_drawer.line( + xy=((Xcm, yZero), (Xc, Yc)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + elif (Yc0 - yZero) * (Yc - yZero) > 0: + if Yc < yZero: + im_drawer.line( + xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, + width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + minYc = min(Yc - yZero, Yc0 - yZero) + if minYc < 0: + im_drawer.line( + xy=((Xc0, Yc0), (Xc, Yc)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + + if not self.multipleInterval and INTERCROSS and self.dominanceChecked: + plusColor = self.DOMINANCE_COLOR_POSITIVE + minusColor = self.DOMINANCE_COLOR_NEGATIVE + for k, aPoint in enumerate(DominanceCoordXY): + if k > 0: + Xc0, Yc0 = DominanceCoordXY[k - 1] + Xc, Yc = aPoint + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 + Xcm = Xc + else: + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 + if Yc0 < yZero: + im_drawer.line( + xy=((Xc0, Yc0), (Xcm, yZero)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + im_drawer.line( + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + im_drawer.line( + xy=((Xcm, yZero), (Xc, Yc)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + elif (Yc0 - yZero) * (Yc - yZero) > 0: + if Yc < yZero: + im_drawer.line( + xy=((Xc0, Yc0), (Xc, Yc)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), + fill=minusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + minYc = min(Yc - yZero, Yc0 - yZero) + if minYc < 0: + im_drawer.line( + xy=((Xc0, Yc0), (Xc, Yc)), + fill=plusColor, width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + else: + im_drawer.line( + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, + width=lineWidth + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) + ) + + # draw additive scale + if not self.multipleInterval and self.additiveChecked: + additiveScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=16 * zoom) + additiveScale = Plot.detScaleOld(0, additiveMax) + additiveStep = (additiveScale[1] - \ + additiveScale[0]) / additiveScale[2] + additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) + addPlotScale = AdditiveHeightThresh / additiveMax + TEXT_Y_DISPLACEMENT = -8 + + additiveAxisList.append(additiveScale[1]) + for item in additiveAxisList: + additiveY = yZero - item * addPlotScale + im_drawer.line( + xy=((xLeftOffset + plotWidth, additiveY), + (xLeftOffset + 4 + plotWidth, additiveY)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) + scaleStr = "%2.3f" % item + im_drawer.text( + text=scaleStr, + xy=(xLeftOffset + plotWidth + 6, + additiveY + TEXT_Y_DISPLACEMENT), + font=additiveScaleFont, fill=self.ADDITIVE_COLOR_POSITIVE) + + im_drawer.line( + xy=((xLeftOffset + plotWidth, additiveY), + (xLeftOffset + plotWidth, yZero)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) + + im_drawer.line( + xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30 * (zoom - 1))), + fill=self.LRS_COLOR, width=1 * zoom) # the blue line running up the y axis + + def drawGraphBackground(self, canvas, gifmap, offset=(80, 120, 80, 50), zoom=1, startMb=None, endMb=None): + # conditions + # multiple Chromosome view + # single Chromosome Physical + # single Chromosome Genetic + im_drawer = ImageDraw.Draw(canvas) + xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset + plotWidth = canvas.size[0] - xLeftOffset - xRightOffset + plotHeight = canvas.size[1] - yTopOffset - yBottomOffset + yBottom = yTopOffset + plotHeight + fontZoom = zoom + if zoom == 2: + fontZoom = 1.5 + yTopOffset += 30 + + # calculate plot scale + if self.plotScale != 'physic': + self.ChrLengthDistList = self.ChrLengthCMList + drawRegionDistance = self.ChrLengthCMSum + else: + self.ChrLengthDistList = self.ChrLengthMbList + drawRegionDistance = self.ChrLengthMbSum + + if self.selectedChr > -1: # single chromosome view + spacingAmt = plotWidth / 13.5 + i = 0 + for startPix in Plot.frange(xLeftOffset, xLeftOffset + plotWidth, spacingAmt): + if (i % 2 == 0): + theBackColor = self.GRAPH_BACK_DARK_COLOR + else: + theBackColor = self.GRAPH_BACK_LIGHT_COLOR + i += 1 + im_drawer.rectangle( + [(startPix, yTopOffset), + (min(startPix + spacingAmt, xLeftOffset + plotWidth), yBottom)], + outline=theBackColor, fill=theBackColor) + + drawRegionDistance = self.ChrLengthDistList[self.ChrList[self.selectedChr][1]] + self.ChrLengthDistList = [drawRegionDistance] + if self.plotScale == 'physic': + plotXScale = plotWidth / (endMb - startMb) + else: + plotXScale = plotWidth / drawRegionDistance + + else: # multiple chromosome view + plotXScale = plotWidth / \ + ((len(self.genotype) - 1) * self.GraphInterval + drawRegionDistance) + + startPosX = xLeftOffset + if fontZoom == 1.5: + chrFontZoom = 2 + else: + chrFontZoom = 1 + chrLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=24 * chrFontZoom) + + for i, _chr in enumerate(self.genotype): + if (i % 2 == 0): + theBackColor = self.GRAPH_BACK_DARK_COLOR + else: + theBackColor = self.GRAPH_BACK_LIGHT_COLOR + + # draw the shaded boxes and the sig/sug thick lines + im_drawer.rectangle( + ((startPosX, yTopOffset), + (startPosX + self.ChrLengthDistList[i] * plotXScale, yBottom)), + outline=GAINSBORO, + fill=theBackColor) + + chrNameWidth, chrNameHeight = im_drawer.textsize( + _chr.name, font=chrLabelFont) + chrStartPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale - chrNameWidth) / 2 + chrEndPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale + chrNameWidth) / 2 + + TEXT_Y_DISPLACEMENT = 0 + im_drawer.text(xy=(chrStartPix, yTopOffset + TEXT_Y_DISPLACEMENT), + text=_chr.name, font=chrLabelFont, fill=BLACK) + COORDS = "%d,%d,%d,%d" % ( + chrStartPix, yTopOffset, chrEndPix, yTopOffset + 20) + + # add by NL 09-03-2010 + HREF = "javascript:chrView(%d,%s);" % (i, self.ChrLengthMbList) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', + coords=COORDS, + href=HREF) + gifmap.append(Areas) + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale + + return plotXScale + + def drawPermutationHistogram(self): + ######################################### + # Permutation Graph + ######################################### + myCanvas = Image.new("RGBA", size=(500, 300)) + if 'lod_score' in self.qtlresults[0] and self.LRS_LOD == "LRS": + perm_output = [value * 4.61 for value in self.perm_output] + elif 'lod_score' not in self.qtlresults[0] and self.LRS_LOD == "LOD": + perm_output = [value / 4.61 for value in self.perm_output] + else: + perm_output = self.perm_output + + filename = webqtlUtil.genRandStr("Reg_") + Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, + YLabel='Frequency', title=' Histogram of Permutation Test') + myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR + filename), + format='gif') + + return filename + + def geneTable(self, geneCol, refGene=None): + if self.dataset.group.species == 'mouse' or self.dataset.group.species == 'rat': + self.gene_table_header = self.getGeneTableHeaderList(refGene=None) + self.gene_table_body = self.getGeneTableBody(geneCol, refGene=None) + else: + self.gene_table_header = None + self.gene_table_body = None + + def getGeneTableHeaderList(self, refGene=None): + + gene_table_header_list = [] + if self.dataset.group.species == "mouse": + if refGene: + gene_table_header_list = ["Index", + "Symbol", + "Mb Start", + "Length (Kb)", + "SNP Count", + "SNP Density", + "Avg Expr", + "Human Chr", + "Mb Start (hg19)", + "Literature Correlation", + "Gene Description"] + else: + gene_table_header_list = ["", + "Index", + "Symbol", + "Mb Start", + "Length (Kb)", + "SNP Count", + "SNP Density", + "Avg Expr", + "Human Chr", + "Mb Start (hg19)", + "Gene Description"] + elif self.dataset.group.species == "rat": + gene_table_header_list = ["", + "Index", + "Symbol", + "Mb Start", + "Length (Kb)", + "Avg Expr", + "Mouse Chr", + "Mb Start (mm9)", + "Human Chr", + "Mb Start (hg19)", + "Gene Description"] + + return gene_table_header_list + + def getGeneTableBody(self, geneCol, refGene=None): + gene_table_body = [] + + tableIterationsCnt = 0 + if self.dataset.group.species == "mouse": + for gIndex, theGO in enumerate(geneCol): + + tableIterationsCnt = tableIterationsCnt + 1 + + this_row = [] # container for the cells of each row + selectCheck = HtmlGenWrapper.create_input_tag( + type_="checkbox", + name="selectCheck", + value=theGO["GeneSymbol"], + Class="checkbox trait_checkbox") # checkbox for each row + + geneLength = (theGO["TxEnd"] - theGO["TxStart"]) * 1000.0 + tenPercentLength = geneLength * 0.0001 + txStart = theGO["TxStart"] + txEnd = theGO["TxEnd"] + theGO["snpDensity"] = theGO["snpCount"] / geneLength + if self.ALEX_DEBUG_BOOL_PRINT_GENE_LIST: + geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO[ + "GeneID"] + + if theGO["snpCount"]: + snpString = HT.Link( + (f"http://genenetwork.org/webqtl/main.py?FormID=snpBrowser&" + f"chr={theGO['Chr']}&" + f"start={theGO['TxStart']}&" + f"end={theGO['TxEnd']}&" + f"geneName={theGO['GeneSymbol']}&" + f"s1={self.diffCol[0]}&s2=%d"), + str(theGO["snpCount"]) # The text to display + ) + snpString.set_blank_target() + snpString.set_attribute("class", "normalsize") + else: + snpString = 0 + + mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm10&position=chr" + \ + theGO["Chr"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( + int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" + + # the chromosomes for human 1 are 1qXX.XX + if 'humanGene' in theGO: + if theGO['humanGene']["TxStart"] == '': + humanStartDisplay = "" + else: + humanStartDisplay = "%0.6f" % theGO['humanGene']["TxStart"] + + humanChr = theGO['humanGene']["Chr"] + humanTxStart = theGO['humanGene']["TxStart"] + + humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % ( + humanChr, int(1000000 * theGO['humanGene']["TxStart"]), int(1000000 * theGO['humanGene']["TxEnd"])) + else: + humanStartString = humanChr = humanStartDisplay = "--" + + geneDescription = theGO["GeneDescription"] + if len(geneDescription) > 70: + geneDescription = geneDescription[:70] + "..." + + if theGO["snpDensity"] < 0.000001: + snpDensityStr = "0" + else: + snpDensityStr = "%0.6f" % theGO["snpDensity"] + + avgExpr = [] # theGO["avgExprVal"] + if avgExpr in ([], None): + avgExpr = "--" + else: + avgExpr = "%0.6f" % avgExpr + + # If we have a referenceGene then we will show the Literature Correlation + if theGO["Chr"] == "X": + chr_as_int = 19 + else: + chr_as_int = int(theGO["Chr"]) - 1 + if refGene: + literatureCorrelationString = str(self.getLiteratureCorrelation( + self.cursor, refGene, theGO['GeneID']) or "N/A") + + this_row = [selectCheck.__str__(), + str(tableIterationsCnt), + str(HtmlGenWrapper.create_link_tag( + geneIdString, + theGO["GeneSymbol"], + target="_blank") + ), + str(HtmlGenWrapper.create_link_tag( + mouseStartString, + "{:.6f}".format(txStart), + target="_blank") + ), + str(HtmlGenWrapper.create_link_tag( + "javascript:rangeView('{}', {:f}, {:f})".format( + str(chr_as_int), + txStart - tenPercentLength, + txEnd + tenPercentLength), + "{:.3f}".format(geneLength))), + snpString, + snpDensityStr, + avgExpr, + humanChr, + str(HtmlGenWrapper.create_link_tag( + humanStartString, + humanStartDisplay, + target="_blank")), + literatureCorrelationString, + geneDescription] + else: + this_row = [selectCheck.__str__(), + str(tableIterationsCnt), + str(HtmlGenWrapper.create_link_tag( + geneIdString, theGO["GeneSymbol"], + target="_blank")), + str(HtmlGenWrapper.create_link_tag( + mouseStartString, + "{:.6f}".format(txStart), + target="_blank")), + str(HtmlGenWrapper.create_link_tag( + "javascript:rangeView('{}', {:f}, {:f})".format( + str(chr_as_int), + txStart - tenPercentLength, + txEnd + tenPercentLength), + "{:.3f}".format(geneLength))), + snpString, + snpDensityStr, + avgExpr, + humanChr, + str(HtmlGenWrapper.create_link_tag( + humanStartString, + humanStartDisplay, + target="_blank")), + geneDescription] + + gene_table_body.append(this_row) + + elif self.dataset.group.species == 'rat': + for gIndex, theGO in enumerate(geneCol): + this_row = [] # container for the cells of each row + selectCheck = str(HtmlGenWrapper.create_input_tag( + type_="checkbox", + name="selectCheck", + Class="checkbox trait_checkbox")) # checkbox for each row + + if theGO["GeneID"] != "": + geneSymbolNCBI = str(HtmlGenWrapper.create_link_tag( + "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format( + theGO["GeneID"]), + theGO["GeneSymbol"], + Class="normalsize", + target="_blank")) + else: + geneSymbolNCBI = theGO["GeneSymbol"] + + if theGO["Chr"] == "X": + chr_as_int = 20 + else: + chr_as_int = int(theGO["Chr"]) - 1 + + geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) + geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chr"], float( + theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) + + avgExprVal = [] + if avgExprVal != "" and avgExprVal: + avgExprVal = "%0.5f" % float(avgExprVal) + else: + avgExprVal = "" + + # Mouse Gene + if theGO['mouseGene']: + mouseChr = theGO['mouseGene']["Chr"] + mouseTxStart = "%0.6f" % theGO['mouseGene']["TxStart"] + else: + mouseChr = mouseTxStart = "" + + # the chromosomes for human 1 are 1qXX.XX + if 'humanGene' in theGO: + humanChr = theGO['humanGene']["Chr"] + humanTxStart = "%0.6f" % theGO['humanGene']["TxStart"] + else: + humanChr = humanTxStart = "" + + geneDesc = theGO["GeneDescription"] + if geneDesc == "---": + geneDesc = "" + + this_row = [selectCheck.__str__(), + str(gIndex + 1), + geneSymbolNCBI, + "%0.6f" % theGO["TxStart"], + str(HtmlGenWrapper.create_link_tag( + geneLengthURL, + "{:.3f}".format(geneLength * 1000.0))), + avgExprVal, + mouseChr, + mouseTxStart, + humanChr, + humanTxStart, + geneDesc] + + gene_table_body.append(this_row) + + return gene_table_body + + def getLiteratureCorrelation(cursor, geneId1=None, geneId2=None): + if not geneId1 or not geneId2: + return None + if geneId1 == geneId2: + return 1.0 + geneId1 = str(geneId1) + geneId2 = str(geneId2) + lCorr = None + try: + query = 'SELECT Value FROM LCorrRamin3 WHERE GeneId1 = %s and GeneId2 = %s' + for x, y in [(geneId1, geneId2), (geneId2, geneId1)]: + cursor.execute(query, (x, y)) + lCorr = cursor.fetchone() + if lCorr: + lCorr = lCorr[0] + break + except: + raise # lCorr = None + return lCorr diff --git a/gn2/wqflask/marker_regression/exceptions.py b/gn2/wqflask/marker_regression/exceptions.py new file mode 100644 index 00000000..8c7e822b --- /dev/null +++ b/gn2/wqflask/marker_regression/exceptions.py @@ -0,0 +1,13 @@ +"""Mapping Exception classes.""" + +class NoMappingResultsError(Exception): + "Exception to raise if no results are computed." + + def __init__(self, trait, dataset, mapping_method): + self.trait = trait + self.dataset = dataset + self.mapping_method = mapping_method + self.message = ( + f"The mapping of trait '{trait}' from dataset '{dataset}' using " + f"the '{mapping_method}' mapping method returned no results.") + super().__init__(self.message, trait, mapping_method) diff --git a/gn2/wqflask/marker_regression/gemma_mapping.py b/gn2/wqflask/marker_regression/gemma_mapping.py new file mode 100644 index 00000000..d8851486 --- /dev/null +++ b/gn2/wqflask/marker_regression/gemma_mapping.py @@ -0,0 +1,245 @@ +import os +import math +import string +import random +import json + +from gn2.base import webqtlConfig +from gn2.base.trait import create_trait +from gn2.base.data_set import create_dataset +from gn2.utility.redis_tools import get_redis_conn +from gn2.utility.tools import flat_files, assert_file +from gn2.utility.tools import GEMMA_WRAPPER_COMMAND +from gn2.utility.tools import TEMPDIR +from gn2.utility.tools import WEBSERVER_MODE +from gn2.utility.tools import get_setting +from gn2.wqflask.database import database_connection +from gn3.computations.gemma import generate_hash_of_string + + +GEMMAOPTS = "-debug" +if WEBSERVER_MODE == 'PROD': + GEMMAOPTS = "-no-check" + + +def generate_random_n_string(n): + return ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(n)) + + +def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, + maf=0.01, first_run=True, output_files=None): + """Generates p-values for each marker using GEMMA""" + + if this_dataset.group.genofile is not None: + genofile_name = this_dataset.group.genofile[:-5] + else: + genofile_name = this_dataset.group.name + + if first_run: + pheno_filename = gen_pheno_txt_file(this_dataset, genofile_name, vals) + + if not os.path.isfile(f"{webqtlConfig.GENERATED_IMAGE_DIR}" + f"{genofile_name}_output.assoc.txt"): + open((f"{webqtlConfig.GENERATED_IMAGE_DIR}" + f"{genofile_name}_output.assoc.txt"), + "w+") + + k_output_filename = (f"{this_dataset.group.name}_K_" + f"{generate_random_n_string(6)}") + gwa_output_filename = (f"{this_dataset.group.name}_GWA_" + f"{generate_random_n_string(6)}") + + + this_chromosomes_name = [] + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as db_cursor: + for this_chr in this_dataset.species.chromosomes.chromosomes(db_cursor): + this_chromosomes_name.append(this_dataset.species.chromosomes.chromosomes(db_cursor)[this_chr].name) + + chr_list_string = ",".join(this_chromosomes_name) + if covariates != "": + covar_filename = gen_covariates_file(this_dataset, covariates, samples) + if str(use_loco).lower() == "true": + bimbam_dir = flat_files('genotype/bimbam') + geno_filepath = assert_file( + f"{bimbam_dir}/{genofile_name}_geno.txt") + pheno_filepath = f"{TEMPDIR}/gn2/{pheno_filename}.txt" + snps_filepath = assert_file( + f"{bimbam_dir}/{genofile_name}_snps.txt") + k_json_output_filepath = f"{TEMPDIR}/gn2/{k_output_filename}.json" + generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco " + f"{chr_list_string} -- {GEMMAOPTS} " + f"-g {geno_filepath} -p " + f"{pheno_filepath} -a " + f"{snps_filepath} -gk > " + f"{k_json_output_filepath}") + os.system(generate_k_command) + + gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco " + f"--input {k_json_output_filepath} " + f"-- {GEMMAOPTS} " + f"-g {geno_filepath} " + f"-p {pheno_filepath} ") + if covariates != "": + gemma_command += (f"-c {flat_files('mapping')}/" + f"{covar_filename}.txt " + f"-a {flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt " + f"-lmm 9 -maf {maf} > {TEMPDIR}/gn2/" + f"{gwa_output_filename}.json") + else: + gemma_command += (f"-a {flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt -lmm 9 -maf " + f"{maf} > " + f"{TEMPDIR}/gn2/{gwa_output_filename}.json") + + else: + generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json -- " + f"{GEMMAOPTS} " + f" -g {flat_files('genotype/bimbam')}/" + f"{genofile_name}_geno.txt -p " + f"{TEMPDIR}/gn2/{pheno_filename}.txt -a " + f"{flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt -gk > " + f"{TEMPDIR}/gn2/{k_output_filename}.json") + + os.system(generate_k_command) + + gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --input " + f"{TEMPDIR}/gn2/{k_output_filename}.json -- " + f"{GEMMAOPTS} " + f"-a {flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt " + f"-lmm 9 -g {flat_files('genotype/bimbam')}/" + f"{genofile_name}_geno.txt -p " + f"{TEMPDIR}/gn2/{pheno_filename}.txt ") + + if covariates != "": + gemma_command += (f" -c {flat_files('mapping')}/" + f"{covar_filename}.txt > " + f"{TEMPDIR}/gn2/{gwa_output_filename}.json") + else: + gemma_command += f" > {TEMPDIR}/gn2/{gwa_output_filename}.json" + + os.system(gemma_command) + else: + gwa_output_filename = output_files + + if use_loco == "True": + marker_obs = parse_loco_output(this_dataset, gwa_output_filename) + return marker_obs, gwa_output_filename + else: + marker_obs = parse_loco_output( + this_dataset, gwa_output_filename, use_loco) + return marker_obs, gwa_output_filename + + +def gen_pheno_txt_file(this_dataset, genofile_name, vals): + """Generates phenotype file for GEMMA""" + + filename = "PHENO_" + generate_hash_of_string(this_dataset.name + str(vals)).replace("/", "_") + + with open(f"{TEMPDIR}/gn2/{filename}.txt", "w") as outfile: + for value in vals: + if value == "x": + outfile.write("NA\n") + else: + outfile.write(value + "\n") + + return filename + + +def gen_covariates_file(this_dataset, covariates, samples): + covariate_list = covariates.split(",") + covariate_data_object = [] + for covariate in covariate_list: + this_covariate_data = [] + trait_name = covariate.split(":")[0] + dataset_name = covariate.split(":")[1] + if dataset_name == "Temp": + temp_group = trait_name.split("_")[2] + dataset_ob = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=temp_group) + else: + dataset_ob = create_dataset(covariate.split(":")[1]) + trait_ob = create_trait(dataset=dataset_ob, + name=trait_name, + cellid=None) + this_dataset.group.get_samplelist(redis_conn=get_redis_conn()) + trait_samples = this_dataset.group.samplelist + trait_sample_data = trait_ob.data + for index, sample in enumerate(trait_samples): + if sample in samples: + if sample in trait_sample_data: + sample_value = trait_sample_data[sample].value + this_covariate_data.append(sample_value) + else: + this_covariate_data.append("-9") + covariate_data_object.append(this_covariate_data) + + filename = "COVAR_" + generate_hash_of_string(this_dataset.name + str(covariate_data_object)).replace("/", "_") + + with open((f"{flat_files('mapping')}/" + f"{filename}.txt"), + "w") as outfile: + for i in range(len(covariate_data_object[0])): + for this_covariate in covariate_data_object: + outfile.write(str(this_covariate[i]) + "\t") + outfile.write("\n") + + return filename + + +def parse_loco_output(this_dataset, gwa_output_filename, loco="True"): + + output_filename = f"{TEMPDIR}/gn2/{gwa_output_filename}.json" + if os.stat(output_filename).st_size == 0: + return {} + + output_filelist = [] + with open(output_filename) as data_file: + data = json.load(data_file) + + files = data['files'] + for file in files: + output_filelist.append(file[2]) + + included_markers = [] + p_values = [] + marker_obs = [] + previous_chr = 0 + + for this_file in output_filelist: + if not os.path.isfile(this_file): + break + with open(this_file) as output_file: + for line in output_file: + if line.startswith("chr\t"): + continue + else: + marker = {} + marker['name'] = line.split("\t")[1] + if line.split("\t")[0] != "X" and line.split("\t")[0] != "X/Y" and line.split("\t")[0] != "Y" and line.split("\t")[0] != "M": + if "chr" in line.split("\t")[0]: + marker['chr'] = int(line.split("\t")[0][3:]) + else: + marker['chr'] = int(line.split("\t")[0]) + if marker['chr'] > int(previous_chr): + previous_chr = marker['chr'] + elif marker['chr'] < int(previous_chr): + break + else: + marker['chr'] = line.split("\t")[0] + marker['Mb'] = float(line.split("\t")[2]) / 1000000 + marker['p_value'] = float(line.split("\t")[10]) + marker['additive'] = -(float(line.split("\t")[7])/2) + if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): + marker['lod_score'] = marker['p_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + marker_obs.append(marker) + + included_markers.append(line.split("\t")[1]) + p_values.append(float(line.split("\t")[9])) + + return marker_obs diff --git a/gn2/wqflask/marker_regression/plink_mapping.py b/gn2/wqflask/marker_regression/plink_mapping.py new file mode 100644 index 00000000..7427717a --- /dev/null +++ b/gn2/wqflask/marker_regression/plink_mapping.py @@ -0,0 +1,167 @@ +import string +import os + +from gn2.base.webqtlConfig import TMPDIR +from gn2.utility import webqtlUtil +from gn2.utility.tools import flat_files, PLINK_COMMAND + + +def run_plink(this_trait, dataset, species, vals, maf): + plink_output_filename = webqtlUtil.genRandStr( + f"{dataset.group.name}_{this_trait.name}_") + gen_pheno_txt_file(dataset, vals) + + plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " + + os.system(plink_command) + + count, p_values = parse_plink_output(plink_output_filename, species) + + dataset.group.markers.add_pvalues(p_values) + + return dataset.group.markers.markers + + +def gen_pheno_txt_file(this_dataset, vals): + """Generates phenotype file for GEMMA/PLINK""" + + current_file_data = [] + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam", "r") as outfile: + for i, line in enumerate(outfile): + split_line = line.split() + current_file_data.append(split_line) + + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam", "w") as outfile: + for i, line in enumerate(current_file_data): + if vals[i] == "x": + this_val = -9 + else: + this_val = vals[i] + outfile.write("0 " + line[1] + " " + line[2] + " " + \ + line[3] + " " + line[4] + " " + str(this_val) + "\n") + + +def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename=''): + ped_sample_list = get_samples_from_ped_file(dataset) + output_file = open(f"{TMPDIR}{pheno_filename}.txt", "wb") + header = f"FID\tIID\t{this_trait.name}\n" + output_file.write(header) + + new_value_list = [] + + # if valueDict does not include some strain, value will be set to -9999 as missing value + for i, sample in enumerate(ped_sample_list): + try: + value = vals[i] + value = str(value).replace('value=', '') + value = value.strip() + except: + value = -9999 + + new_value_list.append(value) + + new_line = '' + for i, sample in enumerate(ped_sample_list): + j = i + 1 + value = new_value_list[i] + new_line += f"{sample}\t{sample}\t{value}\n" + + if j % 1000 == 0: + output_file.write(newLine) + new_line = '' + + if new_line: + output_file.write(new_line) + + output_file.close() + +# get strain name from ped file in order + + +def get_samples_from_ped_file(dataset): + ped_file = open(f"{flat_files('mapping')}{dataset.group.name}.ped", "r") + line = ped_file.readline() + sample_list = [] + + while line: + lineList = line.strip().split('\t') + lineList = [item.strip() for item in lineList] + + sample_name = lineList[0] + sample_list.append(sample_name) + + line = ped_file.readline() + + return sample_list + + +def parse_plink_output(output_filename, species): + plink_results = {} + + threshold_p_value = 1 + + result_fp = open(f"{TMPDIR}{output_filename}.qassoc", "rb") + + line = result_fp.readline() + + value_list = [] # initialize value list, this list will include snp, bp and pvalue info + p_value_dict = {} + count = 0 + + while line: + # convert line from str to list + line_list = build_line_list(line=line) + + # only keep the records whose chromosome name is in db + if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip() != 'NA': + + chr_name = species.chromosomes.chromosomes[int(line_list[0])] + snp = line_list[1] + BP = line_list[2] + p_value = float(line_list[-1]) + if threshold_p_value >= 0 and threshold_p_value <= 1: + if p_value < threshold_p_value: + p_value_dict[snp] = float(p_value) + + if chr_name in plink_results: + value_list = plink_results[chr_name] + + # pvalue range is [0,1] + if threshold_p_value >= 0 and threshold_p_value <= 1: + if p_value < threshold_p_value: + value_list.append((snp, BP, p_value)) + count += 1 + + plink_results[chr_name] = value_list + value_list = [] + else: + if threshold_p_value >= 0 and threshold_p_value <= 1: + if p_value < threshold_p_value: + value_list.append((snp, BP, p_value)) + count += 1 + + if value_list: + plink_results[chr_name] = value_list + + value_list = [] + + line = result_fp.readline() + else: + line = result_fp.readline() + + return count, p_value_dict + +###################################################### +# input: line: str,one line read from file +# function: convert line from str to list; +# output: lineList list +####################################################### + + +def build_line_list(line=""): + # irregular number of whitespaces between columns + line_list = line.strip().split(' ') + line_list = [item for item in line_list if item != ''] + line_list = [item.strip() for item in line_list] + + return line_list diff --git a/gn2/wqflask/marker_regression/qtlreaper_mapping.py b/gn2/wqflask/marker_regression/qtlreaper_mapping.py new file mode 100644 index 00000000..2c9ca1b2 --- /dev/null +++ b/gn2/wqflask/marker_regression/qtlreaper_mapping.py @@ -0,0 +1,193 @@ +import os +import math +import string +import random +import json +import re + +from gn2.base import webqtlConfig +from gn2.base.trait import GeneralTrait +from gn2.base.data_set import create_dataset +from gn2.utility.tools import flat_files, REAPER_COMMAND, TEMPDIR + + +def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): + """Generates p-values for each marker using qtlreaper""" + + if first_run: + if this_dataset.group.genofile != None: + genofile_name = this_dataset.group.genofile[:-5] + else: + genofile_name = this_dataset.group.name + + trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" + gen_pheno_txt_file(samples, vals, trait_filename) + + output_filename = (f"{this_dataset.group.name}_GWA_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) + bootstrap_filename = None + permu_filename = None + + opt_list = [] + if boot_check and num_bootstrap > 0: + bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) + + opt_list.append("-b") + opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") + opt_list.append( + f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") + if num_perm > 0: + permu_filename = ("{this_dataset.group.name}_PERM_" + + ''.join(random.choice(string.ascii_uppercase + + string.digits) for _ in range(6)) + ) + opt_list.append("-n " + str(num_perm)) + opt_list.append( + "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") + if control_marker != "" and do_control == "true": + opt_list.append("-c " + control_marker) + if manhattan_plot != True: + opt_list.append("--interval 1") + + reaper_command = (REAPER_COMMAND + + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), + + genofile_name, + TEMPDIR, + trait_filename, + " ".join( + opt_list), + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename)) + os.system(reaper_command) + else: + output_filename, permu_filename, bootstrap_filename = output_files + + marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( + output_filename, permu_filename, bootstrap_filename) + + suggestive = 0 + significant = 0 + if len(permu_vals) > 0: + suggestive = permu_vals[int(num_perm * 0.37 - 1)] + significant = permu_vals[int(num_perm * 0.95 - 1)] + + return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, + [output_filename, permu_filename, bootstrap_filename]) + + +def gen_pheno_txt_file(samples, vals, trait_filename): + """Generates phenotype file for GEMMA""" + + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile: + outfile.write("Trait\t") + + filtered_sample_list = [] + filtered_vals_list = [] + for i, sample in enumerate(samples): + if vals[i] != "x": + filtered_sample_list.append(sample) + filtered_vals_list.append(vals[i]) + + samples_string = "\t".join(filtered_sample_list) + outfile.write(samples_string + "\n") + outfile.write("T1\t") + values_string = "\t".join(filtered_vals_list) + outfile.write(values_string) + + +def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): + included_markers = [] + p_values = [] + marker_obs = [] + + only_cm = False + only_mb = False + + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{gwa_filename}.txt") as output_file: + for line in output_file: + if line.startswith("ID\t"): + if len(line.split("\t")) < 8: + if 'cM' in line.split("\t"): + only_cm = True + else: + only_mb = True + continue + else: + marker = {} + marker['name'] = line.split("\t")[1] + try: + marker['chr'] = int(line.split("\t")[2]) + except: + marker['chr'] = line.split("\t")[2] + if only_cm or only_mb: + if only_cm: + marker['cM'] = float(line.split("\t")[3]) + else: + if float(line.split("\t")[3]) > 1000: + marker['Mb'] = float(line.split("\t")[3]) / 1000000 + else: + marker['Mb'] = float(line.split("\t")[3]) + if float(line.split("\t")[6]) != 1: + marker['p_value'] = float(line.split("\t")[6]) + marker['lrs_value'] = float(line.split("\t")[4]) + marker['lod_score'] = marker['lrs_value'] / 4.61 + marker['additive'] = float(line.split("\t")[5]) + else: + marker['cM'] = float(line.split("\t")[3]) + if float(line.split("\t")[4]) > 1000: + marker['Mb'] = float(line.split("\t")[4]) / 1000000 + else: + marker['Mb'] = float(line.split("\t")[4]) + if float(line.split("\t")[7]) != 1: + marker['p_value'] = float(line.split("\t")[7]) + marker['lrs_value'] = float(line.split("\t")[5]) + marker['lod_score'] = marker['lrs_value'] / 4.61 + marker['additive'] = float(line.split("\t")[6]) + marker_obs.append(marker) + + # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc + sorted_indices = natural_sort(marker_obs) + + permu_vals = [] + if permu_filename: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{permu_filename}.txt") as permu_file: + for line in permu_file: + permu_vals.append(float(line)) + + bootstrap_vals = [] + if bootstrap_filename: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") as bootstrap_file: + for line in bootstrap_file: + bootstrap_vals.append(int(line)) + + marker_obs = [marker_obs[i] for i in sorted_indices] + if len(bootstrap_vals) > 0: + bootstrap_vals = [bootstrap_vals[i] for i in sorted_indices] + + return marker_obs, permu_vals, bootstrap_vals + + +def natural_sort(marker_list): + """ + Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort + Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results + """ + + def convert(text): + if text.isdigit(): + return int(text) + else: + if text != "M": + return text.lower() + else: + return "z" + + alphanum_key = lambda key: [convert(c) for c in re.split( + '([0-9]+)', str(marker_list[key]['chr']))] + return sorted(list(range(len(marker_list))), key=alphanum_key) diff --git a/gn2/wqflask/marker_regression/rqtl_mapping.py b/gn2/wqflask/marker_regression/rqtl_mapping.py new file mode 100644 index 00000000..b7739228 --- /dev/null +++ b/gn2/wqflask/marker_regression/rqtl_mapping.py @@ -0,0 +1,167 @@ +import csv +import hashlib +import io +import json +import requests +import shutil +from typing import Dict +from typing import List +from typing import Optional +from typing import TextIO + +import numpy as np + +from gn2.base.webqtlConfig import TMPDIR +from gn2.base.trait import create_trait +from gn2.utility.redis_tools import get_redis_conn +from gn2.utility.tools import locate, get_setting, GN3_LOCAL_URL +from gn2.wqflask.database import database_connection + + +def run_rqtl(trait_name, vals, samples, dataset, pair_scan, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors): + """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)""" + + pheno_file = write_phenotype_file(trait_name, samples, vals, dataset, cofactors, perm_strata_list) + if dataset.group.genofile: + geno_file = locate(dataset.group.genofile, "genotype") + else: + geno_file = locate(dataset.group.name + ".geno", "genotype") + + post_data = { + "pheno_file": pheno_file, + "geno_file": geno_file, + "model": model, + "method": method, + "nperm": num_perm, + "scale": mapping_scale + } + + if pair_scan: + post_data["pairscan"] = True + + if cofactors: + covarstruct_file = write_covarstruct_file(cofactors) + post_data["covarstruct"] = covarstruct_file + + if do_control == "true" and control_marker: + post_data["control"] = control_marker + + if not manhattan_plot and not pair_scan: + post_data["interval"] = True + if cofactors: + post_data["addcovar"] = True + + if perm_strata_list: + post_data["pstrata"] = True + + rqtl_output = requests.post(GN3_LOCAL_URL + "api/rqtl/compute", data=post_data).json() + if num_perm > 0: + return rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'], rqtl_output['results'] + else: + return rqtl_output['results'] + + +def get_hash_of_textio(the_file: TextIO) -> str: + """Given a StringIO, return the hash of its contents""" + + the_file.seek(0) + hash_of_file = hashlib.md5(the_file.read().encode()).hexdigest() + hash_of_file = hash_of_file.replace("/", "_") # Replace / with _ to prevent issue with filenames being translated to directories + + return hash_of_file + + +def write_covarstruct_file(cofactors: str) -> str: + """ + Given list of cofactors (as comma-delimited string), write + a comma-delimited file where the first column consists of cofactor names + and the second column indicates whether they're numerical or categorical + """ + trait_datatype_json = None + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute("SELECT value FROM TraitMetadata WHERE type='trait_data_type'") + trait_datatype_json = json.loads(cursor.fetchone()[0]) + + covar_struct_file = io.StringIO() + writer = csv.writer(covar_struct_file, delimiter="\t", quoting = csv.QUOTE_NONE) + for cofactor in cofactors.split(","): + datatype = trait_datatype_json[cofactor] if cofactor in trait_datatype_json else "numerical" + cofactor_name = cofactor.split(":")[0] + writer.writerow([cofactor_name, datatype]) + + hash_of_file = get_hash_of_textio(covar_struct_file) + file_path = TMPDIR + hash_of_file + ".csv" + + with open(file_path, "w") as fd: + covar_struct_file.seek(0) + shutil.copyfileobj(covar_struct_file, fd) + + return file_path + + +def write_phenotype_file(trait_name: str, + samples: List[str], + vals: List, + dataset_ob, + cofactors: Optional[str] = None, + perm_strata_list: Optional[List] = None) -> TextIO: + """Given trait name, sample list, value list, dataset ob, and optional string + representing cofactors, return the file's full path/name + + """ + cofactor_data = cofactors_to_dict(cofactors, dataset_ob, samples) + + pheno_file = io.StringIO() + writer = csv.writer(pheno_file, delimiter="\t", quoting=csv.QUOTE_NONE) + + header_row = ["Samples", trait_name] + header_row += [cofactor for cofactor in cofactor_data] + if perm_strata_list: + header_row.append("Strata") + + writer.writerow(header_row) + for i, sample in enumerate(samples): + this_row = [sample] + if vals[i] != "x": + this_row.append(str(round(float(vals[i]), 3))) + else: + this_row.append("NA") + for cofactor in cofactor_data: + this_row.append(cofactor_data[cofactor][i]) + if perm_strata_list: + this_row.append(perm_strata_list[i]) + writer.writerow(this_row) + + hash_of_file = get_hash_of_textio(pheno_file) + file_path = TMPDIR + hash_of_file + ".csv" + + with open(file_path, "w") as fd: + pheno_file.seek(0) + shutil.copyfileobj(pheno_file, fd) + + return file_path + + +def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict: + """Given a string of cofactors, the trait being mapped's dataset ob, + and list of samples, return cofactor data as a Dict + + """ + cofactor_dict = {} + if cofactors: + dataset_ob.group.get_samplelist(redis_conn=get_redis_conn()) + sample_list = dataset_ob.group.samplelist + for cofactor in cofactors.split(","): + cofactor_name, cofactor_dataset = cofactor.split(":") + if cofactor_dataset == dataset_ob.name: + cofactor_dict[cofactor_name] = [] + trait_ob = create_trait(dataset=dataset_ob, + name=cofactor_name) + sample_data = trait_ob.data + for index, sample in enumerate(samples): + if sample in sample_data: + sample_value = str(round(float(sample_data[sample].value), 3)) + cofactor_dict[cofactor_name].append(sample_value) + else: + cofactor_dict[cofactor_name].append("NA") + return cofactor_dict diff --git a/gn2/wqflask/marker_regression/run_mapping.py b/gn2/wqflask/marker_regression/run_mapping.py new file mode 100644 index 00000000..7d2d40f7 --- /dev/null +++ b/gn2/wqflask/marker_regression/run_mapping.py @@ -0,0 +1,743 @@ +from gn2.base.trait import GeneralTrait +from gn2.base import data_set # import create_dataset + +from pprint import pformat as pf + +import string +import math +from decimal import Decimal +import random +import sys +import datetime +import os +import collections +import uuid + +import numpy as np + +import pickle as pickle +import itertools + +import simplejson as json + +from redis import Redis +Redis = Redis() + +from flask import Flask, g + +from gn2.base.trait import GeneralTrait +from gn2.base import data_set +from gn2.base import species +from gn2.base import webqtlConfig +from gn2.utility import webqtlUtil, helper_functions, hmac, Plot, Bunch, temp_data +from gn2.utility.redis_tools import get_redis_conn +from gn2.wqflask.database import database_connection +from gn2.wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping +from gn2.wqflask.show_trait.SampleList import SampleList + +from gn2.utility.tools import locate, get_setting, locate_ignore_error, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR +from gn2.utility.external import shell +from gn2.base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR + +Redis = get_redis_conn() + +class RunMapping: + + def __init__(self, start_vars, temp_uuid): + helper_functions.get_species_dataset_trait(self, start_vars) + + # needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + self.temp_uuid = temp_uuid + + # ZS: Needed to zoom in or remap temp traits like PCA traits + if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": + self.temp_trait = "True" + self.group = self.dataset.group.name + + self.hash_of_inputs = start_vars['hash_of_inputs'] + self.dataid = start_vars['dataid'] + + self.json_data = {} + self.json_data['lodnames'] = ['lod.hk'] + + # ZS: Sometimes a group may have a genofile that only includes a subset of samples + genofile_samplelist = [] + if 'genofile' in start_vars: + if start_vars['genofile'] != "": + self.genofile_string = start_vars['genofile'] + self.dataset.group.genofile = self.genofile_string.split(":")[ + 0] + genofile_samplelist = get_genofile_samplelist(self.dataset) + + all_samples_ordered = self.dataset.group.all_samples_ordered() + + self.vals = [] + self.samples = [] + self.sample_vals = start_vars['sample_vals'] + self.vals_hash = start_vars['vals_hash'] + sample_val_dict = json.loads(self.sample_vals) + samples = sample_val_dict.keys() + if (len(genofile_samplelist) != 0): + for sample in genofile_samplelist: + self.samples.append(sample) + if sample in samples: + self.vals.append(sample_val_dict[sample]) + else: + self.vals.append("x") + else: + for sample in self.dataset.group.samplelist: + if sample in samples: + self.vals.append(sample_val_dict[sample]) + self.samples.append(sample) + + if 'n_samples' in start_vars: + self.n_samples = start_vars['n_samples'] + else: + self.n_samples = len([val for val in self.vals if val != "x"]) + + # ZS: Check if genotypes exist in the DB in order to create links for markers + + self.geno_db_exists = geno_db_exists(self.dataset) + + self.mapping_method = start_vars['method'] + if "results_path" in start_vars: + self.mapping_results_path = start_vars['results_path'] + else: + mapping_results_filename = "_".join([self.dataset.group.name, self.mapping_method, self.vals_hash]).replace("/", "_") + self.mapping_results_path = "{}{}.csv".format( + webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) + + self.pair_scan = False + self.manhattan_plot = False + if 'manhattan_plot' in start_vars: + if start_vars['manhattan_plot'].lower() != "false": + self.color_scheme = "alternating" + if "color_scheme" in start_vars: + self.color_scheme = start_vars['color_scheme'] + if self.color_scheme == "single": + self.manhattan_single_color = start_vars['manhattan_single_color'] + self.manhattan_plot = True + + self.maf = start_vars['maf'] # Minor allele frequency + if "use_loco" in start_vars: + self.use_loco = start_vars['use_loco'] + else: + self.use_loco = None + self.suggestive = "" + self.significant = "" + if 'transform' in start_vars: + self.transform = start_vars['transform'] + else: + self.transform = "" + self.score_type = "LRS" # ZS: LRS or LOD + self.mapping_scale = "physic" + if "mapping_scale" in start_vars: + self.mapping_scale = start_vars['mapping_scale'] + self.num_perm = 0 + self.perm_output = [] + self.bootstrap_results = [] + self.covariates = start_vars['covariates'] if "covariates" in start_vars else "" + self.categorical_vars = [] + + # ZS: This is passed to GN1 code for single chr mapping + self.selected_chr = -1 + if "selected_chr" in start_vars: + # ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + if int(start_vars['selected_chr']) != -1: + self.selected_chr = int(start_vars['selected_chr']) + 1 + else: + self.selected_chr = int(start_vars['selected_chr']) + if "startMb" in start_vars: + self.startMb = start_vars['startMb'] + if "endMb" in start_vars: + self.endMb = start_vars['endMb'] + if "graphWidth" in start_vars: + self.graphWidth = start_vars['graphWidth'] + if "lrsMax" in start_vars: + self.lrsMax = start_vars['lrsMax'] + if "haplotypeAnalystCheck" in start_vars: + self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] + if "startMb" in start_vars: # This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load + if "permCheck" in start_vars: + self.permCheck = "ON" + else: + self.permCheck = False + self.num_perm = int(start_vars['num_perm']) + + self.LRSCheck = start_vars['LRSCheck'] + + if "showSNP" in start_vars: + self.showSNP = start_vars['showSNP'] + else: + self.showSNP = False + + if "showHomology" in start_vars: + self.showHomology = start_vars['showHomology'] + else: + self.showHomology = False + + if "showGenes" in start_vars: + self.showGenes = start_vars['showGenes'] + else: + self.showGenes = False + + if "viewLegend" in start_vars: + self.viewLegend = start_vars['viewLegend'] + else: + self.viewLegend = False + else: + try: + if int(start_vars['num_perm']) > 0: + self.num_perm = int(start_vars['num_perm']) + except: + self.num_perm = 0 + + if self.num_perm > 0: + self.permCheck = "ON" + else: + self.permCheck = False + self.showSNP = "ON" + self.showGenes = "ON" + self.viewLegend = "ON" + + # self.dataset.group.get_markers() + if self.mapping_method == "gemma": + self.first_run = True + self.output_files = None + if 'output_files' in start_vars: + self.output_files = start_vars['output_files'] + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: + self.first_run = False + self.score_type = "-logP" + self.manhattan_plot = True + if self.use_loco == "True": + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + else: + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + results = marker_obs + elif self.mapping_method == "rqtl_plink": + results = self.run_rqtl_plink() + elif self.mapping_method == "rqtl_geno": + self.perm_strata = [] + if "perm_strata" in start_vars and "categorical_vars" in start_vars: + self.categorical_vars = start_vars["categorical_vars"].split( + ",") + if len(self.categorical_vars) and start_vars["perm_strata"] == "True": + primary_samples = SampleList(dataset=self.dataset, + sample_names=self.samples, + this_trait=self.this_trait) + + self.perm_strata = get_perm_strata( + self.this_trait, primary_samples, self.categorical_vars, self.samples) + self.score_type = "-logP" + self.control_marker = start_vars['control_marker'] + self.do_control = start_vars['do_control'] + if 'mapmethod_rqtl' in start_vars: + self.method = start_vars['mapmethod_rqtl'] + else: + self.method = "em" + self.model = start_vars['mapmodel_rqtl'] + self.pair_scan = False + if start_vars['pair_scan'] == "true": + self.pair_scan = True + if self.permCheck and self.num_perm > 0: + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl( + self.this_trait.name, self.vals, self.samples, self.dataset, self.pair_scan, self.mapping_scale, self.model, self.method, self.num_perm, self.perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates) + else: + results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.pair_scan, self.mapping_scale, self.model, self.method, + self.num_perm, self.perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates) + elif self.mapping_method == "reaper": + if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON + if "additiveCheck" in start_vars: + self.additiveCheck = start_vars['additiveCheck'] + else: + self.additiveCheck = False + + if "bootCheck" in start_vars: + self.bootCheck = "ON" + else: + self.bootCheck = False + self.num_bootstrap = int(start_vars['num_bootstrap']) + else: + self.additiveCheck = "ON" + try: + if int(start_vars['num_bootstrap']) > 0: + self.bootCheck = "ON" + self.num_bootstrap = int(start_vars['num_bootstrap']) + else: + self.bootCheck = False + self.num_bootstrap = 0 + except: + self.bootCheck = False + self.num_bootstrap = 0 + + self.control_marker = start_vars['control_marker'] + self.do_control = start_vars['do_control'] + + self.first_run = True + self.output_files = None + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: + self.first_run = False + if 'output_files' in start_vars: + self.output_files = start_vars['output_files'].split( + ",") + + results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(self.this_trait, + self.dataset, + self.samples, + self.vals, + self.json_data, + self.num_perm, + self.bootCheck, + self.num_bootstrap, + self.do_control, + self.control_marker, + self.manhattan_plot, + self.first_run, + self.output_files) + elif self.mapping_method == "plink": + self.score_type = "-logP" + self.manhattan_plot = True + results = plink_mapping.run_plink( + self.this_trait, self.dataset, self.species, self.vals, self.maf) + #results = self.run_plink() + + self.no_results = False + if len(results) == 0: + self.no_results = True + else: + if self.pair_scan == True: + self.figure_data = results[0] + self.table_data = results[1] + else: + self.qtl_results = [] + self.results_for_browser = [] + self.annotations_for_browser = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + marker['hmac'] = hmac.data_hmac('{}:{}'.format(marker['name'], self.dataset.group.name + "Geno")) + if 'Mb' in marker: + this_ps = marker['Mb'] * 1000000 + else: + this_ps = marker['cM'] * 1000000 + + browser_marker = dict( + chr=str(marker['chr']), + rs=marker['name'], + ps=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + + if self.geno_db_exists == "True": + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + else: + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps + ) + + if 'lrs_value' in marker and marker['lrs_value'] > 0: + browser_marker['p_wald'] = 10**- \ + (marker['lrs_value'] / 4.61) + elif 'lod_score' in marker and marker['lod_score'] > 0: + browser_marker['p_wald'] = 10**-(marker['lod_score']) + else: + browser_marker['p_wald'] = 0 + + self.results_for_browser.append(browser_marker) + self.annotations_for_browser.append(annot_marker) + if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + highest_chr = marker['chr'] + if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): + if 'Mb' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.6f}".format(marker['Mb']) + elif 'cM' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.3f}".format(marker['cM']) + else: + marker['display_pos'] = "N/A" + self.qtl_results.append(marker) + + total_markers = len(self.qtl_results) + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, + self.mapping_method, self.mapping_scale, self.score_type, + self.transform, self.covariates, self.n_samples, self.vals_hash) + + if len(self.qtl_results) > 30000: + self.qtl_results = trim_markers_for_figure( + self.qtl_results) + self.results_for_browser = trim_markers_for_figure( + self.results_for_browser) + filtered_annotations = [] + for marker in self.results_for_browser: + for annot_marker in self.annotations_for_browser: + if annot_marker['rs'] == marker['rs']: + filtered_annotations.append(annot_marker) + break + self.annotations_for_browser = filtered_annotations + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + else: + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + + self.trimmed_markers = trim_markers_for_table(results) + + chr_lengths = get_chr_lengths( + self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) + + # ZS: For zooming into genome browser, need to pass chromosome name instead of number + if self.dataset.group.species == "mouse": + if self.selected_chr == 20: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + elif self.dataset.group.species == "rat": + if self.selected_chr == 21: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + else: + if self.selected_chr == 22: + this_chr = "X" + elif self.selected_chr == 23: + this_chr = "Y" + else: + this_chr = str(self.selected_chr) + + if self.mapping_method != "gemma": + if self.score_type == "LRS": + significant_for_browser = self.significant / 4.61 + else: + significant_for_browser = self.significant + + self.js_data = dict( + #result_score_type = self.score_type, + #this_trait = self.this_trait.name, + #data_set = self.dataset.name, + #maf = self.maf, + #manhattan_plot = self.manhattan_plot, + #mapping_scale = self.mapping_scale, + #chromosomes = chromosome_mb_lengths, + #qtl_results = self.qtl_results, + categorical_vars=self.categorical_vars, + chr_lengths=chr_lengths, + num_perm=self.num_perm, + perm_results=self.perm_output, + significant=significant_for_browser, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + else: + self.js_data = dict( + chr_lengths=chr_lengths, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + + def run_rqtl_plink(self): + # os.chdir("") never do this inside a webserver!! + + output_filename = webqtlUtil.genRandStr("%s_%s_" % ( + self.dataset.group.name, self.this_trait.name)) + + plink_mapping.gen_pheno_txt_file_plink( + self.this_trait, self.dataset, self.vals, pheno_filename=output_filename) + + rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( + self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) + + os.system(rqtl_command) + + count, p_values = self.parse_rqtl_output(plink_output_filename) + + def identify_empty_samples(self): + no_val_samples = [] + for sample_count, val in enumerate(self.vals): + if val == "x": + no_val_samples.append(sample_count) + return no_val_samples + + def trim_genotypes(self, genotype_data, no_value_samples): + trimmed_genotype_data = [] + for marker in genotype_data: + new_genotypes = [] + for item_count, genotype in enumerate(marker): + if item_count in no_value_samples: + continue + try: + genotype = float(genotype) + except ValueError: + genotype = np.nan + pass + new_genotypes.append(genotype) + trimmed_genotype_data.append(new_genotypes) + return trimmed_genotype_data + + +def export_mapping_results(dataset, trait, markers, results_path, mapping_method, mapping_scale, score_type, transform, covariates, n_samples, vals_hash): + if mapping_scale == "physic": + scale_string = "Mb" + else: + scale_string = "cM" + with open(results_path, "w+") as output_file: + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") + output_file.write("Data Set: " + dataset.fullname + "\n") + output_file.write("Trait: " + trait.display_name + "\n") + output_file.write("Trait Hash: " + vals_hash + "\n") + output_file.write("N Samples: " + str(n_samples) + "\n") + output_file.write("Mapping Tool: " + str(mapping_method) + "\n") + if len(transform) > 0: + transform_text = "Transform - " + if transform == "qnorm": + transform_text += "Quantile Normalized" + elif transform == "log2" or transform == "log10": + transform_text += transform.capitalize() + elif transform == "sqrt": + transform_text += "Square Root" + elif transform == "zscore": + transform_text += "Z-Score" + elif transform == "invert": + transform_text += "Invert +/-" + else: + transform_text = "" + output_file.write(transform_text + "\n") + if dataset.type == "ProbeSet": + if trait.symbol: + output_file.write("Gene Symbol: " + trait.symbol + "\n") + output_file.write("Location: " + str(trait.chr) + \ + " @ " + str(trait.mb) + " Mb\n") + if len(covariates) > 0: + output_file.write("Cofactors (dataset - trait):\n") + for covariate in covariates.split(","): + trait_name = covariate.split(":")[0] + dataset_name = covariate.split(":")[1] + output_file.write(dataset_name + " - " + trait_name + "\n") + output_file.write("\n") + output_file.write("Name,Chr,") + if score_type.lower() == "-logP": + score_type = "-logP" + output_file.write(scale_string + "," + score_type) + if "additive" in list(markers[0].keys()): + output_file.write(",Additive") + if "dominance" in list(markers[0].keys()): + output_file.write(",Dominance") + output_file.write("\n") + for i, marker in enumerate(markers): + output_file.write(marker['name'] + "," + str(marker['chr']) + ",") + output_file.write(str(marker[scale_string]) + ",") + if score_type == "-logP": + output_file.write(str(marker['lod_score'])) + else: + output_file.write(str(marker['lrs_value'])) + if "additive" in list(marker.keys()): + output_file.write("," + str(marker['additive'])) + if "dominance" in list(marker.keys()): + output_file.write("," + str(marker['dominance'])) + if i < (len(markers) - 1): + output_file.write("\n") + + +def trim_markers_for_figure(markers): + if 'p_wald' in list(markers[0].keys()): + score_type = 'p_wald' + elif 'lod_score' in list(markers[0].keys()): + score_type = 'lod_score' + else: + score_type = 'lrs_value' + + filtered_markers = [] + low_counter = 0 + med_counter = 0 + high_counter = 0 + for marker in markers: + if score_type == 'p_wald': + if marker[score_type] > 0.1: + if low_counter % 20 == 0: + filtered_markers.append(marker) + low_counter += 1 + elif 0.1 >= marker[score_type] > 0.01: + if med_counter % 10 == 0: + filtered_markers.append(marker) + med_counter += 1 + elif 0.01 >= marker[score_type] > 0.001: + if high_counter % 2 == 0: + filtered_markers.append(marker) + high_counter += 1 + else: + filtered_markers.append(marker) + elif score_type == 'lod_score': + if marker[score_type] < 1: + if low_counter % 20 == 0: + filtered_markers.append(marker) + low_counter += 1 + elif 1 <= marker[score_type] < 2: + if med_counter % 10 == 0: + filtered_markers.append(marker) + med_counter += 1 + elif 2 <= marker[score_type] <= 3: + if high_counter % 2 == 0: + filtered_markers.append(marker) + high_counter += 1 + else: + filtered_markers.append(marker) + else: + if marker[score_type] < 4.61: + if low_counter % 20 == 0: + filtered_markers.append(marker) + low_counter += 1 + elif 4.61 <= marker[score_type] < (2 * 4.61): + if med_counter % 10 == 0: + filtered_markers.append(marker) + med_counter += 1 + elif (2 * 4.61) <= marker[score_type] <= (3 * 4.61): + if high_counter % 2 == 0: + filtered_markers.append(marker) + high_counter += 1 + else: + filtered_markers.append(marker) + return filtered_markers + + +def trim_markers_for_table(markers): + if 'lod_score' in list(markers[0].keys()): + sorted_markers = sorted( + markers, key=lambda k: k['lod_score'], reverse=True) + else: + sorted_markers = sorted( + markers, key=lambda k: k['lrs_value'], reverse=True) + + #ZS: So we end up with a list of just 2000 markers + if len(sorted_markers) >= 25000: + trimmed_sorted_markers = sorted_markers[:25000] + return trimmed_sorted_markers + else: + return sorted_markers + + +def write_input_for_browser(this_dataset, gwas_results, annotations): + file_base = this_dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + gwas_filename = file_base + "_GWAS" + annot_filename = file_base + "_ANNOT" + gwas_path = "{}/gn2/".format(TEMPDIR) + gwas_filename + annot_path = "{}/gn2/".format(TEMPDIR) + annot_filename + + with open(gwas_path + ".json", "w") as gwas_file, open(annot_path + ".json", "w") as annot_file: + gwas_file.write(json.dumps(gwas_results)) + annot_file.write(json.dumps(annotations)) + + return [gwas_filename, annot_filename] + + +def geno_db_exists(this_dataset): + geno_db_name = this_dataset.group.name + "Geno" + try: + geno_db = data_set.create_dataset( + dataset_name=geno_db_name, get_samplelist=False) + return "True" + except: + return "False" + + +def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): + chr_lengths = [] + if mapping_scale == "physic": + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as db_cursor: + for i, the_chr in enumerate(dataset.species.chromosomes.chromosomes(db_cursor)): + this_chr = { + "chr": dataset.species.chromosomes.chromosomes(db_cursor)[the_chr].name, + "size": str(dataset.species.chromosomes.chromosomes(db_cursor)[the_chr].length) + } + chr_lengths.append(this_chr) + else: + this_chr = 1 + highest_pos = 0 + for i, result in enumerate(qtl_results): + chr_as_num = 0 + try: + chr_as_num = int(result['chr']) + except: + chr_as_num = 20 + if chr_as_num > this_chr or i == (len(qtl_results) - 1): + if i == (len(qtl_results) - 1): + if mapping_method == "reaper": + highest_pos = float(result['cM']) * 1000000 + else: + highest_pos = float(result['Mb']) * 1000000 + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) + else: + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) + this_chr = chr_as_num + else: + if mapping_method == "reaper": + if float(result['cM']) > highest_pos: + highest_pos = float(result['cM']) * 1000000 + else: + if float(result['Mb']) > highest_pos: + highest_pos = float(result['Mb']) * 1000000 + + return chr_lengths + + +def get_genofile_samplelist(dataset): + genofile_samplelist = [] + + genofile_json = dataset.group.get_genofiles() + for genofile in genofile_json: + if genofile['location'] == dataset.group.genofile and 'sample_list' in genofile: + genofile_samplelist = genofile['sample_list'] + + return genofile_samplelist + + +def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): + perm_strata_strings = [] + for sample in used_samples: + if sample in list(sample_list.sample_attribute_values.keys()): + combined_string = "" + for var in categorical_vars: + if var in sample_list.sample_attribute_values[sample]: + combined_string += str( + sample_list.sample_attribute_values[sample][var]) + else: + combined_string += "NA" + else: + combined_string = "NA" + + perm_strata_strings.append(combined_string) + + d = dict([(y, x + 1) + for x, y in enumerate(sorted(set(perm_strata_strings)))]) + list_to_numbers = [d[x] for x in perm_strata_strings] + perm_strata = list_to_numbers + + return perm_strata |