From 8aeff9b91d078a40a50d13f6393a1f1dabf62aa4 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 18 Jan 2013 16:58:28 -0600 Subject: Renamed CorrelationPage.py to show_corr_results.py Worked with correlation code; got to the code that begins to do the actual correlations Created a function "get_dataset_and_trait" in the new file "helper_functions.py" because the code initializing the dataset and trait objects was repeated in multiple places --- wqflask/base/data_set.py | 1 + wqflask/base/trait.py | 42 +- wqflask/utility/helper_functions.py | 15 + wqflask/wqflask/correlation/CorrelationPage.py | 2082 ------------------- wqflask/wqflask/correlation/show_corr_results.py | 2107 ++++++++++++++++++++ .../wqflask/marker_regression/marker_regression.py | 13 +- wqflask/wqflask/show_trait/show_trait.py | 35 +- wqflask/wqflask/views.py | 6 +- 8 files changed, 2169 insertions(+), 2132 deletions(-) create mode 100644 wqflask/utility/helper_functions.py delete mode 100644 wqflask/wqflask/correlation/CorrelationPage.py create mode 100644 wqflask/wqflask/correlation/show_corr_results.py (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 50ef8f57..7088913c 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -741,3 +741,4 @@ def geno_mrna_confidentiality(ob): if confidential: # Allow confidential data later NoConfindetialDataForYouTodaySorry + diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 241bf2ab..2af4bc24 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -314,27 +314,27 @@ class GeneralTrait: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. #XZ: The geneid values in database should be cleaned up. - try: - junk = float(self.geneid) - geneidIsNumber = 1 - except: - geneidIsNumber = 0 - - if geneidIsNumber: - query = """ - SELECT - HomologeneId - FROM - Homologene, Species, InbredSet - WHERE - Homologene.GeneId =%s AND - InbredSet.Name = '%s' AND - InbredSet.SpeciesId = Species.Id AND - Species.TaxonomyId = Homologene.TaxonomyId - """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) - result = g.db.execute(query).fetchone() - else: - result = None + #try: + # float(self.geneid) + # geneidIsNumber = True + #except ValueError: + # geneidIsNumber = False + + #if geneidIsNumber: + query = """ + SELECT + HomologeneId + FROM + Homologene, Species, InbredSet + WHERE + Homologene.GeneId =%s AND + InbredSet.Name = '%s' AND + InbredSet.SpeciesId = Species.Id AND + Species.TaxonomyId = Homologene.TaxonomyId + """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) + result = g.db.execute(query).fetchone() + #else: + # result = None if result: self.homologeneid = result[0] diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py new file mode 100644 index 00000000..920d9ac6 --- /dev/null +++ b/wqflask/utility/helper_functions.py @@ -0,0 +1,15 @@ +from __future__ import absolute_import, print_function, division + +from base.trait import GeneralTrait +from base import data_set + +def get_dataset_and_trait(self, start_vars): + #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" + self.dataset = data_set.create_dataset(start_vars['dataset']) + self.this_trait = GeneralTrait(dataset=self.dataset.name, + name=start_vars['trait_id'], + cellid=None) + + #if read_genotype: + self.dataset.group.read_genotype_file() + self.genotype = self.dataset.group.genotype \ No newline at end of file diff --git a/wqflask/wqflask/correlation/CorrelationPage.py b/wqflask/wqflask/correlation/CorrelationPage.py deleted file mode 100644 index f1dd96ef..00000000 --- a/wqflask/wqflask/correlation/CorrelationPage.py +++ /dev/null @@ -1,2082 +0,0 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by NL 2011/02/11 -# Last updated by Christian Fernandez 2012/04/07 -# Refactored correlation calculation into smaller functions in preparation of -# separating html from existing code - -from __future__ import print_function - -import string -from math import * -import cPickle -import os -import time -#import pyXLWriter as xl -import pp -import math - -from pprint import pformat as pf - -from htmlgen import HTMLgen2 as HT -import reaper - -from base import webqtlConfig -from utility.THCell import THCell -from utility.TDCell import TDCell -from base.webqtlTrait import GeneralTrait -from base.data_set import create_dataset -from base.templatePage import templatePage -from utility import webqtlUtil -from dbFunction import webqtlDatabaseFunction -import utility.webqtlUtil #this is for parallel computing only. -import correlationFunction - - -METHOD_SAMPLE_PEARSON = "1" -METHOD_SAMPLE_RANK = "2" -METHOD_LIT = "3" -METHOD_TISSUE_PEARSON = "4" -METHOD_TISSUE_RANK = "5" - -TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] - -TISSUE_MOUSE_DB = 1 - -class AuthException(Exception): pass - - -class Trait(object): - - - def __init__(self, name, raw_values = None, lit_corr = None, tissue_corr = None, p_tissue = None): - self.name = name - self.raw_values = raw_values - self.lit_corr = lit_corr - self.tissue_corr = tissue_corr - self.p_tissue = p_tissue - self.correlation = 0 - self.p_value = 0 - - @staticmethod - def from_csv(line, data_start = 1): - name = line[0] - numbers = line[data_start:] - # _log.info(numbers) - numbers = [ float(number) for number in numbers ] - - return Trait(name, raw_values = numbers) - - def calculate_correlation(self, values, method): - """Calculate the correlation value and p value according to the method specified""" - - #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for - #There's probably a better way of dealing with this, but I'll have to ask Christian - updated_raw_values = [] - updated_values = [] - for i in range(len(values)): - if values[i] != "None": - updated_raw_values.append(self.raw_values[i]) - updated_values.append(values[i]) - - self.raw_values = updated_raw_values - values = updated_values - - if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON: - corr,nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values)) - else: - corr,nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values)) - - self.correlation = corr - self.overlap = nOverlap - - if self.overlap < 3: - self.p_value = 1.0 - else: - #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented. - if abs(self.correlation) >= 1.0: - self.p_value = 0.0 - else: - ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation)) - ZValue = ZValue*sqrt(self.overlap-3) - self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) - - - -#XZ, 01/14/2009: This method is for parallel computing only. -#XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1) -#XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected -def compute_corr( input_nnCorr, input_trait, input_list, computing_method): - - allcorrelations = [] - - for line in input_list: - tokens = line.split('","') - tokens[-1] = tokens[-1][:-2] #remove the last " - tokens[0] = tokens[0][1:] #remove the first " - - traitdataName = tokens[0] - database_trait = tokens[1:] - - if computing_method == "1": #XZ: Pearson's r - corr,nOverlap = utility.webqtlUtil.calCorrelationText(input_trait, database_trait, input_nnCorr) - else: #XZ: Spearman's rho - corr,nOverlap = utility.webqtlUtil.calCorrelationRankText(input_trait, database_trait, input_nnCorr) - traitinfo = [traitdataName,corr,nOverlap] - allcorrelations.append(traitinfo) - - return allcorrelations - -def get_correlation_method_key(form_data): - #XZ, 09/28/2008: if user select "1", then display 1, 3 and 4. - #XZ, 09/28/2008: if user select "2", then display 2, 3 and 5. - #XZ, 09/28/2008: if user select "3", then display 1, 3 and 4. - #XZ, 09/28/2008: if user select "4", then display 1, 3 and 4. - #XZ, 09/28/2008: if user select "5", then display 2, 3 and 5. - - method = form_data.method - if method not in ["1", "2", "3" ,"4", "5"]: - return "1" - - return method - - -def get_custom_trait(form_data, cursor): - """Pulls the custom trait, if it exists, out of the form data""" - trait_name = form_data.fullname - - if trait_name: - trait = webqtlTrait(fullname=trait_name, cursor=cursor) - trait.retrieveInfo() - return trait - else: - return None - - -#XZ, 09/18/2008: get the information such as value, variance of the input strain names from the form. -def get_sample_data(fd): - #print("fd is:", pf(fd.__dict__)) - if fd.allstrainlist: - mdpchoice = fd.MDPChoice - #XZ, in HTML source code, it is "BXD Only", "BXH Only", and so on - if mdpchoice == "1": - strainlist = fd.f1list + fd.strainlist - #XZ, in HTML source code, it is "Non-BXD Only", "Non-BXD Only", etc - elif mdpchoice == "2": - strainlist = [] - strainlist2 = fd.f1list + fd.strainlist - for strain in fd.allstrainlist: - if strain not in strainlist2: - strainlist.append(strain) - #So called MDP Panel - if strainlist: - strainlist = fd.f1list + fd.parlist+strainlist - #XZ, in HTML source code, it is "All Cases" - else: - strainlist = fd.allstrainlist - #XZ, 09/18/2008: put the trait data into dictionary fd.allTraitData - fd.readData(fd.allstrainlist) - else: - mdpchoice = None - strainlist = fd.strainlist - #XZ, 09/18/2008: put the trait data into dictionary fd.allTraitData - fd.readData() - - return strainlist - - - -def get_species(fd, cursor): - #XZ, 3/16/2010: variable RISet must be pass by the form - RISet = fd.RISet - #XZ, 12/12/2008: get species infomation - species = webqtlDatabaseFunction.retrieveSpecies(cursor=cursor, RISet=RISet) - return species - - -def sortTraitCorrelations(traits, method="1"): - if method in TISSUE_METHODS: - traits.sort(key=lambda trait: trait.tissue_corr != None and abs(trait.tissue_corr), reverse=True) - elif method == METHOD_LIT: - traits.sort(key=lambda trait: trait.lit_corr != None and abs(trait.lit_corr), reverse=True) - else: - traits.sort(key=lambda trait: trait.correlation != None and abs(trait.correlation), reverse=True) - - return traits - - -def auth_user_for_db(db, cursor, target_db_name, privilege, username): - """Authorize a user for access to a database if that database is - confidential. A db (identified by a record in ProbeSetFreeze) contains a - list of authorized users who may access it, as well as its confidentiality - level. - - If the current user's privilege level is greater than 'user', ie: root or - admin, then they are automatically authed, otherwise, check the - AuthorizedUsers field for the presence of their name.""" - - if db.type == 'ProbeSet': - cursor.execute('SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name) - indId, indName, indFullName, confidential, AuthorisedUsers = cursor.fetchall()[0] - - if confidential: - authorized = 0 - - #for the dataset that confidentiality is 1 - #1. 'admin' and 'root' can see all of the dataset - #2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table) - if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: - authorized = 1 - else: - if username in AuthorisedUsers.split(","): - authorized = 1 - - if not authorized: - raise AuthException("The %s database you selected is not open to the public at this time, please go back and select other database." % indFullName) - - -class CorrelationPage(templatePage): - - corr_min_informative = 4 - - PAGE_HEADING = "Correlation Table" - #CORRELATION_METHODS = {"1" : "Genetic Correlation (Pearson's r)", - # "2" : "Genetic Correlation (Spearman's rho)", - # "3" : "SGO Literature Correlation", - # "4" : "Tissue Correlation (Pearson's r)", - # "5" : "Tissue Correlation (Spearman's rho)"} - # - #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1} - - - def error(self, message, *args, **kw): - heading = heading or self.PAGE_HEADING - return templatePage.error(heading = heading, detail = [message], error=error) - - def __init__(self, fd): - #print("in CorrelationPage __init__ fd is:", pf(fd.__dict__)) - # Call the superclass constructor - - # Put everything in fd into self - self.__dict__.update(fd.__dict__) - - templatePage.__init__(self, fd) - - #print("in CorrelationPage __init__ now fd is:", pf(fd.__dict__)) - # Connect to the database - if not self.openMysql(): - return - - # Read the genotype from a file - if not fd.genotype: - fd.readGenotype() - - sample_list = get_sample_data(fd) - print("sample_list is", pf(sample_list)) - - # Whether the user chose BXD Only, Non-BXD Only, or All Strains - # (replace BXD with whatever the group/inbredset name is) - # "mdp" stands for "mouse diversity panel" This is outdated; it now represents any - # cases/strains from the non-primary group - mdp_choice = fd.MDPChoice if fd.allstrainlist else None - - self.species = get_species(fd, self.cursor) - - #XZ, 09/18/2008: get all information about the user selected database. - #target_db_name = fd.corr_dataset - self.target_db_name = fd.corr_dataset - - #try: - #print("target_db_name is:", target_db_name) - self.db = create_dataset(self.db_conn, self.target_db_name) - #except: - # detail = ["The database you just requested has not been established yet."] - # self.error(detail) - # return - - # Auth if needed - try: - auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName) - except AuthException as e: - detail = [e.message] - return self.error(detail) - - #XZ, 09/18/2008: filter out the strains that have no value. - self.sample_names, vals, vars, N = fd.informativeStrains(sample_list) - - print("samplenames is:", pf(self.sample_names)) - #CF - If less than a minimum number of strains/cases in common, don't calculate anything - if len(self.sample_names) < self.corr_min_informative: - detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)] - self.error(heading=None, detail=detail) - - for key, value in self.__dict__.items(): - if key.startswith("corr"): - print("[red] %s - %s" % (key, value)) - - #correlation_method = self.CORRELATION_METHODS[self.method] - #rankOrder = self.RANK_ORDERS[self.method] - - # CF - Number of results returned - # Todo: Get rid of self.returnNumber - self.returnNumber = self.corr_return_results - - self.record_count = 0 - - myTrait = get_custom_trait(fd, self.cursor) - - - # We will not get Literature Correlations if there is no GeneId because there is nothing - # to look against - self.gene_id = int(fd.GeneId) - - # We will not get Tissue Correlations if there is no gene symbol because there is nothing to look against - self.trait_symbol = myTrait.symbol - - - #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid - self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.species, self.gene_id) - - #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' - self.tissue_probeset_freeze_id = 1 - - traitList = self.correlate(vals) - - _log.info("Done doing correlation calculation") - -############################################################################################################################################ - - TD_LR = HT.TD(height=200,width="100%",bgColor='#eeeeee') - - mainfmName = webqtlUtil.genRandStr("fm_") - form = HT.Form(cgi = os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), - enctype='multipart/form-data', name= mainfmName, submit=HT.Input(type='hidden')) - hddn = {'FormID': 'showDatabase', - 'ProbeSetID': '_', - 'database': self.target_db_name, - 'databaseFull': self.db.fullname, - 'CellID': '_', - 'RISet': fd.RISet, - 'identification': fd.identification} - - if myTrait: - hddn['fullname'] = fd.fullname - if mdp_choice: - hddn['MDPChoice']= mdp_choice - - - #XZ, 09/18/2008: pass the trait data to next page by hidden parameters. - webqtlUtil.exportData(hddn, fd.allTraitData) - - if fd.incparentsf1: - hddn['incparentsf1']='ON' - - if fd.allstrainlist: - hddn['allstrainlist'] = string.join(fd.allstrainlist, ' ') - - - for key in hddn.keys(): - form.append(HT.Input(name=key, value=hddn[key], type='hidden')) - - #XZ, 11/21/2008: add two parameters to form - form.append(HT.Input(name="X_geneSymbol", value="", type='hidden')) - form.append(HT.Input(name="Y_geneSymbol", value="", type='hidden')) - - #XZ, 3/11/2010: add one parameter to record if the method is rank order. - form.append(HT.Input(name="rankOrder", value="%s" % rankOrder, type='hidden')) - - form.append(HT.Input(name="TissueProbeSetFreezeId", value="%s" % self.tissue_probeset_freeze_id, type='hidden')) - - #################################### - # generate the info on top of page # - #################################### - - info = self.getTopInfo(myTrait=myTrait, method=self.method, db=self.db, target_db_name=self.target_db_name, returnNumber=self.returnNumber, methodDict=self.CORRELATION_METHODS, totalTraits=traitList, identification=fd.identification ) - - ############## - # Excel file # - ############## - filename= webqtlUtil.genRandStr("Corr_") - xlsUrl = HT.Input(type='button', value = 'Download Table', onClick= "location.href='/tmp/%s.xls'" % filename, Class='button') - # Create a new Excel workbook - workbook = xl.Writer('%s.xls' % (webqtlConfig.TMPDIR+filename)) - headingStyle = workbook.add_format(align = 'center', bold = 1, border = 1, size=13, fg_color = 0x1E, color="white") - - #XZ, 3/18/2010: pay attention to the line number of header in this file. As of today, there are 7 lines. - worksheet = self.createExcelFileWithTitleAndFooter(workbook=workbook, identification=fd.identification, db=self.db, returnNumber=self.returnNumber) - - newrow = 7 - - -##################################################################### - - - #Select All, Deselect All, Invert Selection, Add to Collection - mintmap = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'showIntMap');" % mainfmName) - mintmap_img = HT.Image("/images/multiple_interval_mapping1_final.jpg", name='mintmap', alt="Multiple Interval Mapping", title="Multiple Interval Mapping", style="border:none;") - mintmap.append(mintmap_img) - mcorr = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'compCorr');" % mainfmName) - mcorr_img = HT.Image("/images/compare_correlates2_final.jpg", alt="Compare Correlates", title="Compare Correlates", style="border:none;") - mcorr.append(mcorr_img) - cormatrix = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'corMatrix');" % mainfmName) - cormatrix_img = HT.Image("/images/correlation_matrix1_final.jpg", alt="Correlation Matrix and PCA", title="Correlation Matrix and PCA", style="border:none;") - cormatrix.append(cormatrix_img) - networkGraph = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'networkGraph');" % mainfmName) - networkGraph_img = HT.Image("/images/network_graph1_final.jpg", name='mintmap', alt="Network Graphs", title="Network Graphs", style="border:none;") - networkGraph.append(networkGraph_img) - heatmap = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'heatmap');" % mainfmName) - heatmap_img = HT.Image("/images/heatmap2_final.jpg", name='mintmap', alt="QTL Heat Map and Clustering", title="QTL Heatmap and Clustering", style="border:none;") - heatmap.append(heatmap_img) - partialCorr = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'partialCorrInput');" % mainfmName) - partialCorr_img = HT.Image("/images/partial_correlation_final.jpg", name='partialCorr', alt="Partial Correlation", title="Partial Correlation", style="border:none;") - partialCorr.append(partialCorr_img) - addselect = HT.Href(url="#redirect", onClick="addRmvSelection('%s', document.getElementsByName('%s')[0], 'addToSelection');" % (fd.RISet, mainfmName)) - addselect_img = HT.Image("/images/add_collection1_final.jpg", name="addselect", alt="Add To Collection", title="Add To Collection", style="border:none;") - addselect.append(addselect_img) - selectall = HT.Href(url="#redirect", onClick="checkAll(document.getElementsByName('%s')[0]);" % mainfmName) - selectall_img = HT.Image("/images/select_all2_final.jpg", name="selectall", alt="Select All", title="Select All", style="border:none;") - selectall.append(selectall_img) - selectinvert = HT.Href(url="#redirect", onClick = "checkInvert(document.getElementsByName('%s')[0]);" % mainfmName) - selectinvert_img = HT.Image("/images/invert_selection2_final.jpg", name="selectinvert", alt="Invert Selection", title="Invert Selection", style="border:none;") - selectinvert.append(selectinvert_img) - reset = HT.Href(url="#redirect", onClick="checkNone(document.getElementsByName('%s')[0]); return false;" % mainfmName) - reset_img = HT.Image("/images/select_none2_final.jpg", alt="Select None", title="Select None", style="border:none;") - reset.append(reset_img) - selecttraits = HT.Input(type='button' ,name='selecttraits',value='Select Traits', onClick="checkTraits(this.form);",Class="button") - selectgt = HT.Input(type='text' ,name='selectgt',value='-1.0', size=6,maxlength=10,onChange="checkNumeric(this,1.0,'-1.0','gthan','greater than filed')") - selectlt = HT.Input(type='text' ,name='selectlt',value='1.0', size=6,maxlength=10,onChange="checkNumeric(this,-1.0,'1.0','lthan','less than field')") - selectandor = HT.Select(name='selectandor') - selectandor.append(('AND','and')) - selectandor.append(('OR','or')) - selectandor.selected.append('AND') - - - #External analysis tools - GCATButton = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'GCAT');" % mainfmName) - GCATButton_img = HT.Image("/images/GCAT_logo_final.jpg", name="GCAT", alt="GCAT", title="GCAT", style="border:none") - GCATButton.append(GCATButton_img) - - ODE = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'ODE');" % mainfmName) - ODE_img = HT.Image("/images/ODE_logo_final.jpg", name="ode", alt="ODE", title="ODE", style="border:none") - ODE.append(ODE_img) - - ''' - #XZ, 07/07/2010: I comment out this block of code. - WebGestaltScript = HT.Script(language="Javascript") - WebGestaltScript.append(""" -setTimeout('openWebGestalt()', 2000); -function openWebGestalt(){ -var thisForm = document['WebGestalt']; -makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); -} - """ % (mainfmName, len(traitList))) - ''' - - self.cursor.execute('SELECT GeneChip.GO_tree_value FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and ProbeSetFreeze.Name = "%s"' % self.db.name) - result = self.cursor.fetchone() - - if result: - GO_tree_value = result[0] - - if GO_tree_value: - - WebGestalt = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'GOTree');" % mainfmName) - WebGestalt_img = HT.Image("/images/webgestalt_icon_final.jpg", name="webgestalt", alt="Gene Set Analysis Toolkit", title="Gene Set Analysis Toolkit", style="border:none") - WebGestalt.append(WebGestalt_img) - - hddnWebGestalt = { - 'id_list':'', - 'correlation':'', - 'id_value':'', - 'llid_list':'', - 'id_type':GO_tree_value, - 'idtype':'', - 'species':'', - 'list':'', - 'client':''} - - hddnWebGestalt['ref_type'] = hddnWebGestalt['id_type'] - hddnWebGestalt['cat_type'] = 'GO' - hddnWebGestalt['significancelevel'] = 'Top10' - - if self.species == 'rat': - hddnWebGestalt['org'] = 'Rattus norvegicus' - elif self.species == 'human': - hddnWebGestalt['org'] = 'Homo sapiens' - elif self.species == 'mouse': - hddnWebGestalt['org'] = 'Mus musculus' - else: - hddnWebGestalt['org'] = '' - - for key in hddnWebGestalt.keys(): - form.append(HT.Input(name=key, value=hddnWebGestalt[key], type='hidden')) - - - #Create tables with options, etc - - pageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%", border=0, align="Left") - - containerTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="90%",border=0, align="Left") - - - if not GO_tree_value: - optionsTable = HT.TableLite(cellSpacing=2, cellPadding=0,width="480", height="80", border=0, align="Left") - optionsTable.append(HT.TR(HT.TD(selectall), HT.TD(reset), HT.TD(selectinvert), HT.TD(addselect), HT.TD(GCATButton), HT.TD(ODE), align="left")) - optionsTable.append(HT.TR(HT.TD(" "*1,"Select"), HT.TD("Deselect"), HT.TD(" "*1,"Invert"), HT.TD(" "*3,"Add"), HT.TD("Gene Set"), HT.TD(" "*2,"GCAT"))) - else: - optionsTable = HT.TableLite(cellSpacing=2, cellPadding=0,width="560", height="80", border=0, align="Left") - optionsTable.append(HT.TR(HT.TD(selectall), HT.TD(reset), HT.TD(selectinvert), HT.TD(addselect), HT.TD(GCATButton), HT.TD(ODE), HT.TD(WebGestalt), align="left")) - optionsTable.append(HT.TR(HT.TD(" "*1,"Select"), HT.TD("Deselect"), HT.TD(" "*1,"Invert"), HT.TD(" "*3,"Add"), HT.TD("Gene Set"), HT.TD(" "*2,"GCAT"), HT.TD(" "*3, "ODE"))) - containerTable.append(HT.TR(HT.TD(optionsTable))) - - functionTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="480",height="80", border=0, align="Left") - functionRow = HT.TR(HT.TD(networkGraph, width="16.7%"), HT.TD(cormatrix, width="16.7%"), HT.TD(partialCorr, width="16.7%"), HT.TD(mcorr, width="16.7%"), HT.TD(mintmap, width="16.7%"), HT.TD(heatmap), align="left") - labelRow = HT.TR(HT.TD(" "*1,HT.Text("Graph")), HT.TD(" "*1,HT.Text("Matrix")), HT.TD(" "*1,HT.Text("Partial")), HT.TD(HT.Text("Compare")), HT.TD(HT.Text("QTL Map")), HT.TD(HT.Text(text="Heat Map"))) - functionTable.append(functionRow, labelRow) - containerTable.append(HT.TR(HT.TD(functionTable), HT.BR())) - - #more_options = HT.Image("/images/more_options1_final.jpg", name='more_options', alt="Expand Options", title="Expand Options", style="border:none;", Class="toggleShowHide") - - #containerTable.append(HT.TR(HT.TD(more_options, HT.BR(), HT.BR()))) - - moreOptions = HT.Input(type='button',name='options',value='More Options', onClick="",Class="toggle") - fewerOptions = HT.Input(type='button',name='options',value='Fewer Options', onClick="",Class="toggle") - - """ - if (fd.formdata.getvalue('showHideOptions') == 'less'): - containerTable.append(HT.TR(HT.TD(" "), height="10"), HT.TR(HT.TD(HT.Div(fewerOptions, Class="toggleShowHide")))) - containerTable.append(HT.TR(HT.TD(" "))) - else: - containerTable.append(HT.TR(HT.TD(" "), height="10"), HT.TR(HT.TD(HT.Div(moreOptions, Class="toggleShowHide")))) - containerTable.append(HT.TR(HT.TD(" "))) - """ - - containerTable.append(HT.TR(HT.TD(HT.Span(selecttraits,' with r > ',selectgt, ' ',selectandor, ' r < ',selectlt,Class="bd1 cbddf fs11")), style="display:none;", Class="extra_options")) - - chrMenu = HT.Input(type='hidden',name='chromosomes',value='all') - - corrHeading = HT.Paragraph('Correlation Table', Class="title") - - - tblobj = {} - - if self.db.type=="Geno": - containerTable.append(HT.TR(HT.TD(xlsUrl, height=60))) - - pageTable.append(HT.TR(HT.TD(containerTable))) - - tblobj['header'], worksheet = self.getTableHeaderForGeno( method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) - newrow += 1 - - sortby = self.getSortByValue( calculationMethod = self.method ) - - corrScript = HT.Script(language="Javascript") - corrScript.append("var corrArray = new Array();") - - tblobj['body'], worksheet, corrScript = self.getTableBodyForGeno(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript) - - workbook.close() - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - - div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), corrScript, Id="sortable") - - pageTable.append(HT.TR(HT.TD(div))) - - form.append(HT.Input(name='ShowStrains',type='hidden', value =1), - HT.Input(name='ShowLine',type='hidden', value =1), - HT.P(), HT.P(), pageTable) - TD_LR.append(corrHeading, info, form, HT.P()) - - self.dict['body'] = str(TD_LR) - self.dict['js1'] = '' - self.dict['title'] = 'Correlation' - - elif self.db.type=="Publish": - - containerTable.append(HT.TR(HT.TD(xlsUrl, height=40))) - - pageTable.append(HT.TR(HT.TD(containerTable))) - - tblobj['header'], worksheet = self.getTableHeaderForPublish(method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) - newrow += 1 - - sortby = self.getSortByValue( calculationMethod = self.method ) - - corrScript = HT.Script(language="Javascript") - corrScript.append("var corrArray = new Array();") - - tblobj['body'], worksheet, corrScript = self.getTableBodyForPublish(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript, species=self.species) - - workbook.close() - - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - # NL, 07/27/2010. genTableObj function has been moved from templatePage.py to webqtlUtil.py; - div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), corrScript, Id="sortable") - - pageTable.append(HT.TR(HT.TD(div))) - - form.append( - HT.Input(name='ShowStrains',type='hidden', value =1), - HT.Input(name='ShowLine',type='hidden', value =1), - HT.P(), pageTable) - TD_LR.append(corrHeading, info, form, HT.P()) - - self.dict['body'] = str(TD_LR) - self.dict['js1'] = '' - self.dict['title'] = 'Correlation' - - - elif self.db.type=="ProbeSet": - tblobj['header'], worksheet = self.getTableHeaderForProbeSet(method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) - newrow += 1 - - sortby = self.getSortByValue( calculationMethod = self.method ) - - corrScript = HT.Script(language="Javascript") - corrScript.append("var corrArray = new Array();") - - tblobj['body'], worksheet, corrScript = self.getTableBodyForProbeSet(traitList=traitList, primaryTrait=myTrait, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript, species=self.species) - - workbook.close() - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - - #XZ: here is the table of traits - div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1", hiddenColumns=["Gene ID","Homologene ID"]), corrScript, Id="sortable") - - - #XZ, 01/12/2009: create database menu for 'Add Correlation' - self.cursor.execute(""" - select - ProbeSetFreeze.FullName, ProbeSetFreeze.Id, Tissue.name - from - ProbeSetFreeze, ProbeFreeze, ProbeSetFreeze as ps2, ProbeFreeze as p2, Tissue - where - ps2.Id = %d - and ps2.ProbeFreezeId = p2.Id - and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id - and (ProbeFreeze.InbredSetId = p2.InbredSetId or (ProbeFreeze.InbredSetId in (1, 3) and p2.InbredSetId in (1, 3))) - and p2.ChipId = ProbeFreeze.ChipId - and ps2.Id != ProbeSetFreeze.Id - and ProbeFreeze.TissueId = Tissue.Id - and ProbeSetFreeze.public > %d - order by - ProbeFreeze.TissueId, ProbeSetFreeze.CreateTime desc - """ % (self.db.id, webqtlConfig.PUBLICTHRESH)) - - results = self.cursor.fetchall() - dbCustomizer = HT.Select(results, name = "customizer") - databaseMenuSub = preTissue = "" - for item in results: - TName, TId, TTissue = item - if TTissue != preTissue: - if databaseMenuSub: - dbCustomizer.append(databaseMenuSub) - databaseMenuSub = HT.Optgroup(label = '%s mRNA ------' % TTissue) - preTissue = TTissue - - databaseMenuSub.append(item[:2]) - if databaseMenuSub: - dbCustomizer.append(databaseMenuSub) - - #updated by NL. Delete function generateJavaScript, move js files to dhtml.js, webqtl.js and jqueryFunction.js - #variables: filename, strainIds and vals are required by getquerystring function - strainIds=self.getStrainIds(species=self.species, strains=self.sample_names) - var1 = HT.Input(name="filename", value=filename, type='hidden') - var2 = HT.Input(name="strainIds", value=strainIds, type='hidden') - var3 = HT.Input(name="vals", value=vals, type='hidden') - customizerButton = HT.Input(type="button", Class="button", value="Add Correlation", onClick = "xmlhttpPost('%smain.py?FormID=AJAX_table', 'sortable', (getquerystring(this.form)))" % webqtlConfig.CGIDIR) - - containerTable.append(HT.TR(HT.TD(HT.Span(var1,var2,var3,customizerButton, "with", dbCustomizer, Class="bd1 cbddf fs11"), HT.BR(), HT.BR()), style="display:none;", Class="extra_options")) - - containerTable.append(HT.TR(HT.TD(xlsUrl, HT.BR(), HT.BR()))) - - pageTable.append(HT.TR(HT.TD(containerTable))) - - pageTable.append(HT.TR(HT.TD(div))) - - if self.species == 'human': - heatmap = "" - - form.append(HT.Input(name='ShowStrains',type='hidden', value =1), - HT.Input(name='ShowLine',type='hidden', value =1), - info, HT.BR(), pageTable, HT.BR()) - - TD_LR.append(corrHeading, form, HT.P()) - - - self.dict['body'] = str(TD_LR) - self.dict['title'] = 'Correlation' - # updated by NL. Delete function generateJavaScript, move js files to dhtml.js, webqtl.js and jqueryFunction.js - self.dict['js1'] = '' - self.dict['js2'] = 'onLoad="pageOffset()"' - self.dict['layer'] = self.generateWarningLayer() - else: - self.dict['body'] = "" - - -############################# -# # -# CorrelationPage Functions # -# # -############################# - - - def getSortByValue(self, calculationMethod): - - if calculationMethod == "1": - sortby = ("Sample p(r)", "up") - elif calculationMethod == "2": - sortby = ("Sample p(rho)", "up") - elif calculationMethod == "3": #XZ: literature correlation - sortby = ("Lit Corr","down") - elif calculationMethod == "4": #XZ: tissue correlation - sortby = ("Tissue r", "down") - elif calculationMethod == "5": - sortby = ("Tissue rho", "down") - - return sortby - - - - def generateWarningLayer(self): - - layerString = """ - - - - - """ - - return layerString - - - #XZ, 01/07/2009: In HTML code, the variable 'database' corresponds to the column 'Name' in database table. - def getFileName(self, target_db_name): ### dcrowell August 2008 - """Returns the name of the reference database file with which correlations are calculated. - Takes argument cursor which is a cursor object of any instance of a subclass of templatePage - Used by correlationPage""" - - query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name - self.cursor.execute(query) - result = self.cursor.fetchone() - Id = result[0] - FullName = result[1] - FullName = FullName.replace(' ','_') - FullName = FullName.replace('/','_') - - FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt' - - return FileName - - - #XZ, 01/29/2009: I modified this function. - #XZ: Note that the type of StrainIds must be number, not string. - def getStrainIds(self, species=None, strains=[]): - StrainIds = [] - for item in strains: - self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE - Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species)) - Id = self.cursor.fetchone()[0] - StrainIds.append(Id) - - return StrainIds - - - #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid - #XZ, 12/12/2008: if the input geneid is 'None', return 0 - #XZ, 12/12/2008: if the input geneid has no corresponding mouse geneid, return 0 - def translateToMouseGeneID (self, species, geneid): - mouse_geneid = 0; - - #if input geneid is None, return 0. - if not geneid: - return mouse_geneid - - if species == 'mouse': - mouse_geneid = geneid - elif species == 'rat': - self.cursor.execute( "SELECT mouse FROM GeneIDXRef WHERE rat=%d" % int(geneid) ) - record = self.cursor.fetchone() - if record: - mouse_geneid = record[0] - elif species == 'human': - self.cursor.execute( "SELECT mouse FROM GeneIDXRef WHERE human=%d" % int(geneid) ) - record = self.cursor.fetchone() - if record: - mouse_geneid = record[0] - - return mouse_geneid - - - #XZ, 12/16/2008: the input geneid is of mouse type - def checkForLitInfo(self,geneId): - q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId - self.cursor.execute(q) - try: - x = self.cursor.fetchone() - if x: return True - else: raise - except: return False - - - #XZ, 12/16/2008: the input geneid is of mouse type - def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): - q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) - self.cursor.execute(q) - try: - x = self.cursor.fetchone() - if x: return True - else: raise - except: return False - - - - def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId): - - StrainIds = [] - for item in strains: - self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species)) - Id = self.cursor.fetchone()[0] - StrainIds.append('%d' % Id) - - # break it into smaller chunks so we don't overload the MySql server - nnn = len(StrainIds) / 25 - if len(StrainIds) % 25: - nnn += 1 - oridata = [] - - #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId - tempTable = None - if GeneId and db.type == "ProbeSet": - if method == "3": - tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber) - - if method == "4" or method == "5": - tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber) - - for step in range(nnn): - temp = [] - StrainIdstep = StrainIds[step*25:min(len(StrainIds), (step+1)*25)] - for item in StrainIdstep: temp.append('T%s.value' % item) - - if db.type == "Publish": - query = "SELECT PublishXRef.Id, " - dataStartPos = 1 - query += string.join(temp,', ') - query += ' FROM (PublishXRef, PublishFreeze)' - #XZ, 03/04/2009: Xiaodong changed Data to PublishData - for item in StrainIdstep: - query += 'left join PublishData as T%s on T%s.Id = PublishXRef.DataId and T%s.StrainId=%s\n' %(item,item,item,item) - query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, ) - #XZ, 09/20/2008: extract literature correlation value together with gene expression values. - #XZ, 09/20/2008: notice the difference between the code in next block. - elif tempTable: - # we can get a little performance out of selecting our LitCorr here - # but also we need to do this because we are unconcerned with probes that have no geneId associated with them - # as we would not have litCorr data. - - if method == "3": - query = "SELECT %s.Name, %s.value," % (db.type,tempTable) - dataStartPos = 2 - if method == "4" or method == "5": - query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) - dataStartPos = 3 - - query += string.join(temp,', ') - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - if method == "3": - query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) - if method == "4" or method == "5": - query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) - for item in StrainIdstep: - query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) - - if method == "3": - query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - if method == "4" or method == "5": - query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - else: - query = "SELECT %s.Name," % db.type - dataStartPos = 1 - query += string.join(temp,', ') - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) - for item in StrainIdstep: - query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) - query += "WHERE %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - - self.cursor.execute(query) - results = self.cursor.fetchall() - oridata.append(results) - - datasize = len(oridata[0]) - traits = [] - # put all of the separate data together into a huge list of lists - for j in range(datasize): - traitdata = list(oridata[0][j]) - for i in range(1,nnn): - traitdata += list(oridata[i][j][dataStartPos:]) - - trait = Trait(traitdata[0], traitdata[dataStartPos:]) - - if method == METHOD_LIT: - trait.lit_corr = traitdata[1] - - if method in TISSUE_METHODS: - trait.tissue_corr = traitdata[1] - trait.p_tissue = traitdata[2] - - traits.append(trait) - - if tempTable: - self.cursor.execute( 'DROP TEMPORARY TABLE %s' % tempTable ) - - return traits - - - - - # XZ, 09/20/2008: This function creates TEMPORARY TABLE tmpTableName_2 and return its name. - # XZ, 09/20/2008: It stores top literature correlation values associated with the input geneId. - # XZ, 09/20/2008: Attention: In each row, the input geneId is always in column GeneId1. - #XZ, 12/16/2008: the input geneid can be of mouse, rat or human type - def getTempLiteratureTable(self, species, input_species_geneid, returnNumber): - # according to mysql the TEMPORARY TABLE name should not have to be unique because - # it is only available to the current connection. This program will be invoked via command line, but if it - # were to be invoked over mod_python this could cuase problems. mod_python will keep the connection alive - # in its executing threads ( i think) so there is a potential for the table not being dropped between users. - #XZ, 01/29/2009: To prevent the potential risk, I generate random table names and drop the tables after use them. - - - # the 'input_species_geneid' could be rat or human geneid, need to translate it to mouse geneid - translated_mouse_geneid = self.translateToMouseGeneID (species, input_species_geneid) - - tmpTableName_1 = webqtlUtil.genRandStr(prefix="LITERATURE") - - q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_1 - q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName_1, translated_mouse_geneid) - q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName_1, translated_mouse_geneid,translated_mouse_geneid) - for x in [q1,q2,q3]: self.cursor.execute(x) - - #XZ, 09/23/2008: Just use the top records insteard of using all records - tmpTableName_2 = webqtlUtil.genRandStr(prefix="TOPLITERATURE") - - q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_2 - self.cursor.execute(q1) - q2 = 'SELECT GeneId1, GeneId2, value FROM %s ORDER BY value DESC' % tmpTableName_1 - self.cursor.execute(q2) - result = self.cursor.fetchall() - - counter = 0 #this is to count how many records being inserted into table - for one_row in result: - mouse_geneid1, mouse_geneid2, lit_corr_alue = one_row - - #mouse_geneid1 has been tested before, now should test if mouse_geneid2 has corresponding geneid in other species - translated_species_geneid = 0 - if species == 'mouse': - translated_species_geneid = mouse_geneid2 - elif species == 'rat': - self.cursor.execute( "SELECT rat FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) ) - record = self.cursor.fetchone() - if record: - translated_species_geneid = record[0] - elif species == 'human': - self.cursor.execute( "SELECT human FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) ) - record = self.cursor.fetchone() - if record: - translated_species_geneid = record[0] - - if translated_species_geneid: - self.cursor.execute( 'INSERT INTO %s (GeneId1, GeneId2, value) VALUES (%d,%d,%f)' % (tmpTableName_2, int(input_species_geneid),int(translated_species_geneid), float(lit_corr_alue)) ) - counter = counter + 1 - - #pay attention to the number - if (counter > 2*returnNumber): - break - - self.cursor.execute('DROP TEMPORARY TABLE %s' % tmpTableName_1) - - return tmpTableName_2 - - - - #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2 - #XZ, 09/24/2008: Note that the correlation value can be negative. - def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0): - - def cmpTissCorrAbsoluteValue(A, B): - try: - if abs(A[1]) < abs(B[1]): return 1 - elif abs(A[1]) == abs(B[1]): - return 0 - else: return -1 - except: - return 0 - - symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method) - - symbolCorrList = symbolCorrDict.items() - - symbolCorrList.sort(cmpTissCorrAbsoluteValue) - symbolCorrList = symbolCorrList[0 : 2*returnNumber] - - tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE") - - q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName - self.cursor.execute(q1) - - for one_pair in symbolCorrList: - one_symbol = one_pair[0] - one_corr = one_pair[1] - one_p_value = symbolPvalueDict[one_symbol] - - self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) ) - - return tmpTableName - - - #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it. - def fetchLitCorrelations(self, species, GeneId, db, returnNumber): ### Used to generate Lit Correlations when calculations are done from text file. dcrowell August 2008 - """Uses getTempLiteratureTable to generate table of literatire correlations. This function then gathers that data and - pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance. - Returns a dictionary of 'TraitID':'LitCorr' for the requested correlation""" - - tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber) - - query = "SELECT %s.Name, %s.value" % (db.type,tempTable) - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) - query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable, db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - - self.cursor.execute(query) - results = self.cursor.fetchall() - - litCorrDict = {} - - for entry in results: - traitName,litcorr = entry - litCorrDict[traitName] = litcorr - - self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable) - - return litCorrDict - - - - #XZ, 01/09/2009: Xiaodong created this function. - def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0): - """Uses getTempTissueCorrTable to generate table of tissue correlations. This function then gathers that data and - pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance. - Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation""" - - - tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber) - - query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" % (tempTable, tempTable) - query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)' - query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) - query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable) - - self.cursor.execute(query) - results = self.cursor.fetchall() - - tissueCorrDict = {} - - for entry in results: - traitName, tissueCorr, tissuePValue = entry - tissueCorrDict[traitName] = (tissueCorr, tissuePValue) - - self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable) - - return tissueCorrDict - - - - #XZ, 01/13/2008 - def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None): - - tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE") - - q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName - q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName, input_trait_mouse_geneid) - q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName, input_trait_mouse_geneid, input_trait_mouse_geneid) - - for x in [q1,q2,q3]: - self.cursor.execute(x) - - for thisTrait in traitList: - try: - if thisTrait.geneid: - thisTrait.mouse_geneid = self.translateToMouseGeneID(species, thisTrait.geneid) - else: - thisTrait.mouse_geneid = 0 - except: - thisTrait.mouse_geneid = 0 - - if thisTrait.mouse_geneid and str(thisTrait.mouse_geneid).find(";") == -1: - try: - self.cursor.execute("SELECT value FROM %s WHERE GeneId2 = %s" % (tmpTableName, thisTrait.mouse_geneid)) - result = self.cursor.fetchone() - if result: - thisTrait.LCorr = result[0] - else: - thisTrait.LCorr = None - except: - thisTrait.LCorr = None - else: - thisTrait.LCorr = None - - self.cursor.execute("DROP TEMPORARY TABLE %s" % tmpTableName) - - return traitList - - def get_trait(self, cached, vals): - - if cached: - _log.info("Using the fast method because the file exists") - lit_corrs = {} - tissue_corrs = {} - use_lit = False - if self.method == METHOD_LIT: - lit_corrs = self.fetchLitCorrelations(species=self.species, GeneId=self.gene_id, db=self.db, returnNumber=self.returnNumber) - use_lit = True - - use_tissue_corr = False - if self.method in TISSUE_METHODS: - tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber) - use_tissue_corr = True - - DatabaseFileName = self.getFileName( target_db_name=self.target_db_name ) - datasetFile = open(webqtlConfig.TEXTDIR+DatabaseFileName,'r') - - #XZ, 01/08/2009: read the first line - line = datasetFile.readline() - cached_sample_names = webqtlUtil.readLineCSV(line)[1:] - - #XZ, 01/08/2009: This step is critical. It is necessary for this new method. - #XZ: The original function fetchAllDatabaseData uses all strains stored in variable _strains to - #XZ: retrieve the values of each strain from database in real time. - #XZ: The new method uses all strains stored in variable dataset_strains to create a new variable - #XZ: _newvals. _newvals has the same length as dataset_strains. The items in _newvals is in - #XZ: the same order of items in dataset_strains. The value of each item in _newvals is either - #XZ: the value of correspinding strain in _vals or 'None'. - new_vals = [] - for name in cached_sample_names: - if name in self.sample_names: - new_vals.append(float(vals[self.sample_names.index(name)])) - else: - new_vals.append('None') - - nnCorr = len(new_vals) - - #XZ, 01/14/2009: If literature corr or tissue corr is selected, - #XZ: there is no need to use parallel computing. - - traits = [] - data_start = 1 - for line in datasetFile: - raw_trait = webqtlUtil.readLineCSV(line) - trait = Trait.from_csv(raw_trait, data_start) - trait.lit_corr = lit_corrs.get(trait.name) - trait.tissue_corr, trait.p_tissue = tissue_corrs.get(trait.name, (None, None)) - traits.append(trait) - - return traits, new_vals - - else: - _log.info("Using the slow method for correlation") - - _log.info("Fetching from database") - traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) - _log.info("Done fetching from database") - totalTraits = len(traits) #XZ, 09/18/2008: total trait number - - return traits, vals - - - def do_parallel_correlation(self): - _log.info("Invoking parallel computing") - input_line_list = datasetFile.readlines() - _log.info("Read lines from the file") - all_line_number = len(input_line_list) - - step = 1000 - job_number = math.ceil( float(all_line_number)/step ) - - job_input_lists = [] - - _log.info("Configuring jobs") - - for job_index in range( int(job_number) ): - starti = job_index*step - endi = min((job_index+1)*step, all_line_number) - - one_job_input_list = [] - - for i in range( starti, endi ): - one_job_input_list.append( input_line_list[i] ) - - job_input_lists.append( one_job_input_list ) - - _log.info("Creating pp servers") - - ppservers = () - # Creates jobserver with automatically detected number of workers - job_server = pp.Server(ppservers=ppservers) - - _log.info("Done creating servers") - - jobs = [] - results = [] - - _log.info("Starting parallel computation, submitting jobs") - for one_job_input_list in job_input_lists: #pay attention to modules from outside - jobs.append( job_server.submit(func=compute_corr, args=(nnCorr, _newvals, one_job_input_list, self.method), depfuncs=(), modules=("utility.webqtlUtil",)) ) - _log.info("Done submitting jobs") - - for one_job in jobs: - one_result = one_job() - results.append( one_result ) - - _log.info("Acquiring results") - - for one_result in results: - for one_traitinfo in one_result: - allcorrelations.append( one_traitinfo ) - - _log.info("Appending the results") - - datasetFile.close() - totalTraits = len(allcorrelations) - _log.info("Done correlating using the fast method") - - - def correlate(self, vals): - - correlations = [] - - #XZ: Use the fast method only for probeset dataset, and this dataset must have been created. - #XZ: Otherwise, use original method - _log.info("Entering correlation") - - db_filename = self.getFileName( target_db_name=self.target_db_name ) - - cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR) - - # If the cache file exists, do a cached correlation for probeset data - if self.db.type == "ProbeSet": -# if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available: -# traits = do_parallel_correlation() -# -# else: - - (traits, vals) = self.get_trait(cache_available, vals) - - for trait in traits: - trait.calculate_correlation(vals, self.method) - - self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later - - #XZ, 3/31/2010: Theoretically, we should create one function 'comTissueCorr' - #to compare each trait by their tissue corr p values. - #But because the tissue corr p values are generated by permutation test, - #the top ones always have p value 0. So comparing p values actually does nothing. - #In addition, for the tissue data in our database, the N is always the same. - #So it's safe to compare with tissue corr statistic value. - #That's the same as literature corr. - #if self.method in [METHOD_LIT, METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] and self.gene_id: - # traits.sort(webqtlUtil.cmpLitCorr) - #else: - #if self.method in TISSUE_METHODS: - # sort(traits, key=lambda A: math.fabs(A.tissue_corr)) - #elif self.method == METHOD_LIT: - # traits.sort(traits, key=lambda A: math.fabs(A.lit_corr)) - #else: - traits = sortTraitCorrelations(traits, self.method) - - # Strip to the top N correlations - traits = traits[:min(self.returnNumber, len(traits))] - - addLiteratureCorr = False - addTissueCorr = False - - trait_list = [] - for trait in traits: - db_trait = webqtlTrait(db=self.db, name=trait.name, cursor=self.cursor) - db_trait.retrieveInfo( QTL='Yes' ) - - db_trait.Name = trait.name - db_trait.corr = trait.correlation - db_trait.nOverlap = trait.overlap - db_trait.corrPValue = trait.p_value - - # NL, 07/19/2010 - # js function changed, add a new parameter rankOrder for js function 'showTissueCorrPlot' - db_trait.RANK_ORDER = self.RANK_ORDERS[self.method] - - #XZ, 26/09/2008: Method is 4 or 5. Have fetched tissue corr, but no literature correlation yet. - if self.method in TISSUE_METHODS: - db_trait.tissueCorr = trait.tissue_corr - db_trait.tissuePValue = trait.p_tissue - addTissueCorr = True - - - #XZ, 26/09/2008: Method is 3, Have fetched literature corr, but no tissue corr yet. - elif self.method == METHOD_LIT: - db_trait.LCorr = trait.lit_corr - db_trait.mouse_geneid = self.translateToMouseGeneID(self.species, db_trait.geneid) - addLiteratureCorr = True - - #XZ, 26/09/2008: Method is 1 or 2. Have NOT fetched literature corr and tissue corr yet. - # Phenotype data will not have geneid, and neither will some probes - # we need to handle this because we will get an attribute error - else: - if self.input_trait_mouse_gene_id and self.db.type=="ProbeSet": - addLiteratureCorr = True - if self.trait_symbol and self.db.type=="ProbeSet": - addTissueCorr = True - - trait_list.append(db_trait) - - if addLiteratureCorr: - trait_list = self.getLiteratureCorrelationByList(self.input_trait_mouse_gene_id, - self.species, trait_list) - if addTissueCorr: - trait_list = self.getTissueCorrelationByList( - primaryTraitSymbol = self.trait_symbol, - traitList = trait_list, - TissueProbeSetFreezeId = TISSUE_MOUSE_DB, - method=self.method) - - return trait_list - - - def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None): - - symbolCorrDict = {} - symbolPvalueDict = {} - - primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) - primaryTraitValue = primaryTraitSymbolValueDict.values()[0] - - SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) - - if method in ["2","5"]: - symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman') - else: - symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict) - - - return (symbolCorrDict, symbolPvalueDict) - - - - #XZ, 10/13/2010 - def getTissueCorrelationByList(self, primaryTraitSymbol=None, traitList=None, TissueProbeSetFreezeId=None, method=None): - - primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) - - if primaryTraitSymbol.lower() in primaryTraitSymbolValueDict: - primaryTraitValue = primaryTraitSymbolValueDict[primaryTraitSymbol.lower()] - - geneSymbolList = [] - - for thisTrait in traitList: - if hasattr(thisTrait, 'symbol'): - geneSymbolList.append(thisTrait.symbol) - - SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=geneSymbolList, TissueProbeSetFreezeId=TISSUE_MOUSE_DB) - - for thisTrait in traitList: - if hasattr(thisTrait, 'symbol') and thisTrait.symbol and thisTrait.symbol.lower() in SymbolValueDict: - oneTraitValue = SymbolValueDict[thisTrait.symbol.lower()] - if method in ["2","5"]: - result = correlationFunction.calZeroOrderCorrForTiss( primaryTraitValue, oneTraitValue, method='spearman' ) - else: - result = correlationFunction.calZeroOrderCorrForTiss( primaryTraitValue, oneTraitValue) - thisTrait.tissueCorr = result[0] - thisTrait.tissuePValue = result[2] - else: - thisTrait.tissueCorr = None - thisTrait.tissuePValue = None - else: - for thisTrait in traitList: - thisTrait.tissueCorr = None - thisTrait.tissuePValue = None - - return traitList - - - def getTopInfo(self, myTrait=None, method=None, db=None, target_db_name=None, returnNumber=None, methodDict=None, totalTraits=None, identification=None ): - - if myTrait: - if method in ["1","2"]: #genetic correlation - info = HT.Paragraph("Values of Record %s in the " % myTrait.getGivenName(), HT.Href(text=myTrait.db.fullname,url=webqtlConfig.INFOPAGEHREF % myTrait.db.name,target="_blank", Class="fwn"), - " database were compared to all %d records in the " % self.record_count, HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank", Class="fwn"), - ' database. The top %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.') - else: - #myTrait.retrieveInfo()#need to know geneid and symbol - if method == "3":#literature correlation - searchDBName = "Literature Correlation" - searchDBLink = "/correlationAnnotation.html#literatureCorr" - else: #tissue correlation - searchDBName = "Tissue Correlation" - searchDBLink = "/correlationAnnotation.html#tissueCorr" - info = HT.Paragraph("Your input record %s in the " % myTrait.getGivenName(), HT.Href(text=myTrait.db.fullname,url=webqtlConfig.INFOPAGEHREF % myTrait.db.name,target="_blank", Class="fwn"), - " database corresponds to ", - HT.Href(text='gene Id %s, and gene symbol %s' % (myTrait.geneid, myTrait.symbol), target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % myTrait.geneid, Class="fs12 fwn"), - '. GN ranked all genes in the ', HT.Href(text=searchDBName,url=searchDBLink,target="_blank", Class="fwn"),' database by the %s.' % methodDict[method], - ' The top %d probes or probesets in the ' % returnNumber, HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank", Class="fwn"), - ' database corresponding to the top genes ranked by the %s are displayed.' %( methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.' ) - - elif identification: - info = HT.Paragraph('Values of %s were compared to all %d traits in ' % (identification, self.record_count), - HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank",Class="fwn"), - ' database. The TOP %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.') - - else: - info = HT.Paragraph('Trait values were compared to all values in ', - HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank",Class="fwn"), - ' database. The TOP %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.') - - if db.type=="Geno": - info.append(HT.BR(),HT.BR(),'Clicking on the Locus will open the genotypes data for that locus. Click on the correlation to see a scatter plot of the trait data.') - elif db.type=="Publish": - info.append(HT.BR(),HT.BR(),'Clicking on the record ID will open the published phenotype data for that publication. Click on the correlation to see a scatter plot of the trait data. ') - elif db.type=="ProbeSet": - info.append(HT.BR(),'Click the correlation values to generate scatter plots. Select the Record ID to open the Trait Data and Analysis form. Select the symbol to open NCBI Entrez.') - else: - pass - - - return info - - - def createExcelFileWithTitleAndFooter(self, workbook=None, identification=None, db=None, returnNumber=None): - - worksheet = workbook.add_worksheet() - - titleStyle = workbook.add_format(align = 'left', bold = 0, size=14, border = 1, border_color="gray") - - ##Write title Info - # Modified by Hongqiang Li - worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) - worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) - worksheet.write([2, 0], "Trait : %s" % identification, titleStyle) - worksheet.write([3, 0], "Database : %s" % db.fullname, titleStyle) - worksheet.write([4, 0], "Date : %s" % time.strftime("%B %d, %Y", time.gmtime()), titleStyle) - worksheet.write([5, 0], "Time : %s GMT" % time.strftime("%H:%M ", time.gmtime()), titleStyle) - worksheet.write([6, 0], "Status of data ownership: Possibly unpublished data; please see %s/statusandContact.html for details on sources, ownership, and usage of these data." % webqtlConfig.PORTADDR, titleStyle) - #Write footer info - worksheet.write([9 + returnNumber, 0], "Funding for The GeneNetwork: NIAAA (U01AA13499, U24AA13513), NIDA, NIMH, and NIAAA (P20-DA21131), NCI MMHCC (U01CA105417), and NCRR (U01NR 105417)", titleStyle) - worksheet.write([10 + returnNumber, 0], "PLEASE RETAIN DATA SOURCE INFORMATION WHENEVER POSSIBLE", titleStyle) - - return worksheet - - - def getTableHeaderForGeno(self, method=None, worksheet=None, newrow=None, headingStyle=None): - - tblobj_header = [] - - if method in ["1","3","4"]: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb"), sort=0), - THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Record ID', idx=1), - THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Location (Chr and Mb)', idx=2), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=3), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=4), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=5)]] - - for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)', 'Sample r', 'N Cases', 'Sample p(r)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - else: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb"), sort=0), - THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Record ID', idx=1), - THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Location (Chr and Mb)', idx=2), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=3), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=4), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=5)]] - - for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)', 'Sample rho', 'N Cases', 'Sample p(rho)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - - - return tblobj_header, worksheet - - - def getTableBodyForGeno(self, traitList, formName=None, worksheet=None, newrow=None, corrScript=None): - - tblobj_body = [] - - for thisTrait in traitList: - tr = [] - - trId = str(thisTrait) - - corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) - - tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) - - tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"),align="left", Class="fs12 fwn ffl b1 c222"), text=thisTrait.name, val=thisTrait.name.upper())) - - #XZ: trait_location_value is used for sorting - trait_location_repr = '--' - trait_location_value = 1000000 - - if thisTrait.chr and thisTrait.mb: - try: - trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb - except: - if thisTrait.chr.upper() == 'X': - trait_location_value = 20*1000 + thisTrait.mb - else: - trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb - - trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) - - tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) - - - repr='%3.3f' % thisTrait.corr - tr.append(TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlot('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"), Class="fs12 fwn ffl b1 c222", nowrap='ON', align='right'),repr,abs(thisTrait.corr))) - - repr = '%d' % thisTrait.nOverlap - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222",align='right'),repr,thisTrait.nOverlap)) - - repr = webqtlUtil.SciFloat(thisTrait.corrPValue) - tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) - - tblobj_body.append(tr) - - for ncol, item in enumerate([thisTrait.name, trait_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue]): - worksheet.write([newrow, ncol], item) - newrow += 1 - - return tblobj_body, worksheet, corrScript - - - def getTableHeaderForPublish(self, method=None, worksheet=None, newrow=None, headingStyle=None): - - tblobj_header = [] - - if method in ["1","3","4"]: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Record ID", idx=1), - THCell(HT.TD('Phenotype', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Phenotype", idx=2), - THCell(HT.TD('Authors', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Authors", idx=3), - THCell(HT.TD('Year', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Year", idx=4), - THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS", idx=5), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS Location", idx=6), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=7), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=8), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=9)]] - - for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)", "Sample r", "N Cases", "Sample p(r)"]): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - else: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Record ID", idx=1), - THCell(HT.TD('Phenotype', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Phenotype", idx=2), - THCell(HT.TD('Authors', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Authors", idx=3), - THCell(HT.TD('Year', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Year", idx=4), - THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS", idx=5), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS Location", idx=6), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=7), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=8), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=9)]] - - for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)", "Sample rho", "N Cases", "Sample p(rho)"]): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - - - return tblobj_header, worksheet - - - def getTableBodyForPublish(self, traitList, formName=None, worksheet=None, newrow=None, corrScript=None, species=''): - - tblobj_body = [] - - for thisTrait in traitList: - tr = [] - - trId = str(thisTrait) - - corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) - - tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) - - tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn"), nowrap="yes",align="center", Class="fs12 fwn b1 c222"),str(thisTrait.name), thisTrait.name)) - - PhenotypeString = thisTrait.post_publication_description - if thisTrait.confidential: - if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=thisTrait.authorized_users): - PhenotypeString = thisTrait.pre_publication_description - - tr.append(TDCell(HT.TD(PhenotypeString, Class="fs12 fwn b1 c222"), PhenotypeString, PhenotypeString.upper())) - - tr.append(TDCell(HT.TD(thisTrait.authors, Class="fs12 fwn b1 c222 fsI"),thisTrait.authors, thisTrait.authors.strip().upper())) - - try: - PubMedLinkText = myear = repr = int(thisTrait.year) - except: - PubMedLinkText = repr = "--" - myear = 0 - if thisTrait.pubmed_id: - PubMedLink = HT.Href(text= repr,url= webqtlConfig.PUBMEDLINK_URL % thisTrait.pubmed_id,target='_blank', Class="fs12 fwn") - else: - PubMedLink = repr - - tr.append(TDCell(HT.TD(PubMedLink, Class="fs12 fwn b1 c222", align='center'), repr, myear)) - - #LRS and its location - LRS_score_repr = '--' - LRS_score_value = 0 - LRS_location_repr = '--' - LRS_location_value = 1000000 - LRS_flag = 1 - - #Max LRS and its Locus location - if thisTrait.lrs and thisTrait.locus: - self.cursor.execute(""" - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, thisTrait.locus)) - result = self.cursor.fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - - - LRS_score_repr = '%3.1f' % thisTrait.lrs - LRS_score_value = thisTrait.lrs - LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) - LRS_flag = 0 - - #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn ffl b1 c222", align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222", align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) - - if LRS_flag: - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) - - repr = '%3.4f' % thisTrait.corr - tr.append(TDCell(HT.TD(HT.Href(text=repr,url="javascript:showCorrPlot('%s', '%s')" % (formName,thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn b1 c222", align='right',nowrap="on"), repr, abs(thisTrait.corr))) - - repr = '%d' % thisTrait.nOverlap - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.nOverlap)) - - repr = webqtlUtil.SciFloat(thisTrait.corrPValue) - tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) - - tblobj_body.append(tr) - - for ncol, item in enumerate([thisTrait.name, PhenotypeString, thisTrait.authors, thisTrait.year, thisTrait.pubmed_id, LRS_score_repr, LRS_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue]): - worksheet.write([newrow, ncol], item) - newrow += 1 - - return tblobj_body, worksheet, corrScript - - - def getTableHeaderForProbeSet(self, method=None, worksheet=None, newrow=None, headingStyle=None): - - tblobj_header = [] - - if method in ["1","3","4"]: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Record ID", idx=1), - THCell(HT.TD('Gene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Gene ID", idx=2), - THCell(HT.TD('Homologene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Homologene ID", idx=3), - THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Symbol", idx=4), - THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Description", idx=5), - THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Location (Chr: Mb)", idx=6), - THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Mean Expr", idx=7), - THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS", idx=8), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS Location (Chr: Mb)", idx=9), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=10), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=11), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=12), - THCell(HT.TD(HT.Href( - text = HT.Span('Lit',HT.BR(), 'Corr', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#literatureCorr"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Lit Corr", idx=13), - #XZ, 09/22/2008: tissue correlation - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue r", idx=14), - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue p(r)", idx=15)]] - - for ncol, item in enumerate(['Record', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr: Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)', 'Sample r', 'N Cases', 'Sample p(r)', 'Lit Corr', 'Tissue r', 'Tissue p(r)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - else: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Record ID", idx=1), - THCell(HT.TD('Gene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Gene ID", idx=2), - THCell(HT.TD('Homologene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Homologene ID", idx=3), - THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Symbol", idx=4), - THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Description", idx=5), - THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Location (Chr: Mb)", idx=6), - THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Mean Expr", idx=7), - THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS", idx=8), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS Location (Chr: Mb)", idx=9), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=10), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=11), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=12), - THCell(HT.TD(HT.Href( - text = HT.Span('Lit',HT.BR(), 'Corr', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#literatureCorr"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Lit Corr", idx=13), - #XZ, 09/22/2008: tissue correlation - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue rho", idx=14), - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue p(rho)", idx=15)]] - - for ncol, item in enumerate(['Record ID', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr: Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)', 'Sample rho', 'N Cases', 'Sample p(rho)', 'Lit Corr', 'Tissue rho', 'Tissue p(rho)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - - return tblobj_header, worksheet - - - def getTableBodyForProbeSet(self, traitList=[], primaryTrait=None, formName=None, worksheet=None, newrow=None, corrScript=None, species=''): - - tblobj_body = [] - - for thisTrait in traitList: - - if thisTrait.symbol: - pass - else: - thisTrait.symbol = "--" - - if thisTrait.geneid: - symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % thisTrait.geneid, Class="fs12 fwn") - else: - symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&DB=gene&term=%s" % thisTrait.symbol, Class="fs12 fwn") - - tr = [] - - trId = str(thisTrait) - - corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) - - #XZ, 12/08/2008: checkbox - tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) - - #XZ, 12/08/2008: probeset name - tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName,thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn b1 c222"), thisTrait.name, thisTrait.name.upper())) - - #XZ, 12/08/2008: gene id - if thisTrait.geneid: - tr.append(TDCell(None, thisTrait.geneid, val=999)) - else: - tr.append(TDCell(None, thisTrait.geneid, val=999)) - - #XZ, 12/08/2008: homologene id - if thisTrait.homologeneid: - tr.append(TDCell("", thisTrait.homologeneid, val=999)) - else: - tr.append(TDCell("", thisTrait.homologeneid, val=999)) - - #XZ, 12/08/2008: gene symbol - tr.append(TDCell(HT.TD(symbolurl, Class="fs12 fwn b1 c222 fsI"),thisTrait.symbol, thisTrait.symbol.upper())) - - #XZ, 12/08/2008: description - #XZ, 06/05/2009: Rob asked to add probe target description - description_string = str(thisTrait.description).strip() - target_string = str(thisTrait.probe_target_description).strip() - - description_display = '' - - if len(description_string) > 1 and description_string != 'None': - description_display = description_string - else: - description_display = thisTrait.symbol - - if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': - description_display = description_display + '; ' + target_string.strip() - - tr.append(TDCell(HT.TD(description_display, Class="fs12 fwn b1 c222"), description_display, description_display)) - - #XZ: trait_location_value is used for sorting - trait_location_repr = '--' - trait_location_value = 1000000 - - if thisTrait.chr and thisTrait.mb: - try: - trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb - except: - if thisTrait.chr.upper() == 'X': - trait_location_value = 20*1000 + thisTrait.mb - else: - trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb - - trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) - - tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) - - """ - #XZ, 12/08/2008: chromosome number - #XZ, 12/10/2008: use Mbvalue to sort chromosome - tr.append(TDCell( HT.TD(thisTrait.chr, Class="fs12 fwn b1 c222", align='right'), thisTrait.chr, Mbvalue) ) - - #XZ, 12/08/2008: Rob wants 6 digit precision, and we have to deal with that the mb could be None - if not thisTrait.mb: - tr.append(TDCell(HT.TD(thisTrait.mb, Class="fs12 fwn b1 c222",align='right'), thisTrait.mb, Mbvalue)) - else: - tr.append(TDCell(HT.TD('%.6f' % thisTrait.mb, Class="fs12 fwn b1 c222", align='right'), thisTrait.mb, Mbvalue)) - """ - - - - #XZ, 01/12/08: This SQL query is much faster. - self.cursor.execute(""" - select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %d and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' - """ % (thisTrait.db.id, thisTrait.name)) - result = self.cursor.fetchone() - if result: - if result[0]: - mean = result[0] - else: - mean=0 - else: - mean = 0 - - #XZ, 06/05/2009: It is neccessary to turn on nowrap - repr = "%2.3f" % mean - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right', nowrap='ON'),repr, mean)) - - #LRS and its location - LRS_score_repr = '--' - LRS_score_value = 0 - LRS_location_repr = '--' - LRS_location_value = 1000000 - LRS_flag = 1 - - #Max LRS and its Locus location - if thisTrait.lrs and thisTrait.locus: - self.cursor.execute(""" - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, thisTrait.locus)) - result = self.cursor.fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - - - LRS_score_repr = '%3.1f' % thisTrait.lrs - LRS_score_value = thisTrait.lrs - LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) - LRS_flag = 0 - - #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn ffl b1 c222", align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222", align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), LRS_location_repr, LRS_location_value)) - - if LRS_flag: - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) - - - #XZ, 12/08/2008: generic correlation - repr='%3.3f' % thisTrait.corr - tr.append(TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlot('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"), Class="fs12 fwn ffl b1 c222", align='right'),repr,abs(thisTrait.corr))) - - #XZ, 12/08/2008: number of overlaped cases - repr = '%d' % thisTrait.nOverlap - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.nOverlap)) - - #XZ, 12/08/2008: p value of genetic correlation - repr = webqtlUtil.SciFloat(thisTrait.corrPValue) - tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) - - #XZ, 12/08/2008: literature correlation - LCorr = 0.0 - LCorrStr = "--" - if hasattr(thisTrait, 'LCorr') and thisTrait.LCorr: - LCorr = thisTrait.LCorr - LCorrStr = "%2.3f" % thisTrait.LCorr - tr.append(TDCell(HT.TD(LCorrStr, Class="fs12 fwn b1 c222", align='right'), LCorrStr, abs(LCorr))) - - #XZ, 09/22/2008: tissue correlation. - TCorr = 0.0 - TCorrStr = "--" - #XZ, 11/20/2008: need to pass two geneids: input_trait_mouse_geneid and thisTrait.mouse_geneid - if hasattr(thisTrait, 'tissueCorr') and thisTrait.tissueCorr: - TCorr = thisTrait.tissueCorr - TCorrStr = "%2.3f" % thisTrait.tissueCorr - # NL, 07/19/2010: add a new parameter rankOrder for js function 'showTissueCorrPlot' - rankOrder = self.RANK_ORDERS[self.method] - TCorrPlotURL = "javascript:showTissueCorrPlot('%s','%s','%s',%d)" %(formName, primaryTrait.symbol, thisTrait.symbol,rankOrder) - tr.append(TDCell(HT.TD(HT.Href(text=TCorrStr, url=TCorrPlotURL, Class="fs12 fwn ff1"), Class="fs12 fwn ff1 b1 c222", align='right'), TCorrStr, abs(TCorr))) - else: - tr.append(TDCell(HT.TD(TCorrStr, Class="fs12 fwn b1 c222", align='right'), TCorrStr, abs(TCorr))) - - #XZ, 12/08/2008: p value of tissue correlation - TPValue = 1.0 - TPValueStr = "--" - if hasattr(thisTrait, 'tissueCorr') and thisTrait.tissuePValue: #XZ, 09/22/2008: thisTrait.tissuePValue can't be used here because it could be 0 - TPValue = thisTrait.tissuePValue - TPValueStr = "%2.3f" % thisTrait.tissuePValue - tr.append(TDCell(HT.TD(TPValueStr, Class="fs12 fwn b1 c222", align='right'), TPValueStr, TPValue)) - - tblobj_body.append(tr) - - for ncol, item in enumerate([thisTrait.name, thisTrait.geneid, thisTrait.homologeneid, thisTrait.symbol, thisTrait.description, trait_location_repr, mean, LRS_score_repr, LRS_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue, LCorr, TCorr, TPValue]): - worksheet.write([newrow, ncol], item) - - newrow += 1 - - return tblobj_body, worksheet, corrScript diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py new file mode 100644 index 00000000..23dd1534 --- /dev/null +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -0,0 +1,2107 @@ +## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by NL 2011/02/11 +# Last updated by Christian Fernandez 2012/04/07 +# Refactored correlation calculation into smaller functions in preparation of +# separating html from existing code + +from __future__ import absolute_import, print_function, division + +import string +from math import * +import cPickle +import os +import time +#import pyXLWriter as xl +import pp +import math + +from pprint import pformat as pf + +from htmlgen import HTMLgen2 as HT +import reaper + +from base import webqtlConfig +from utility.THCell import THCell +from utility.TDCell import TDCell +from base.webqtlTrait import GeneralTrait +from base import data_set +from base.templatePage import templatePage +from utility import webqtlUtil, helper_functions +from dbFunction import webqtlDatabaseFunction +import utility.webqtlUtil #this is for parallel computing only. +from wqflask.correlation import correlationFunction + + +METHOD_SAMPLE_PEARSON = "1" +METHOD_SAMPLE_RANK = "2" +METHOD_LIT = "3" +METHOD_TISSUE_PEARSON = "4" +METHOD_TISSUE_RANK = "5" + +TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] + +TISSUE_MOUSE_DB = 1 + +class AuthException(Exception): pass + + +class Trait(object): + + + def __init__(self, name, raw_values = None, lit_corr = None, tissue_corr = None, p_tissue = None): + self.name = name + self.raw_values = raw_values + self.lit_corr = lit_corr + self.tissue_corr = tissue_corr + self.p_tissue = p_tissue + self.correlation = 0 + self.p_value = 0 + + @staticmethod + def from_csv(line, data_start = 1): + name = line[0] + numbers = line[data_start:] + # _log.info(numbers) + numbers = [ float(number) for number in numbers ] + + return Trait(name, raw_values = numbers) + + def calculate_correlation(self, values, method): + """Calculate the correlation value and p value according to the method specified""" + + #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for + #There's probably a better way of dealing with this, but I'll have to ask Christian + updated_raw_values = [] + updated_values = [] + for i in range(len(values)): + if values[i] != "None": + updated_raw_values.append(self.raw_values[i]) + updated_values.append(values[i]) + + self.raw_values = updated_raw_values + values = updated_values + + if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON: + corr,nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values)) + else: + corr,nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values)) + + self.correlation = corr + self.overlap = nOverlap + + if self.overlap < 3: + self.p_value = 1.0 + else: + #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented. + if abs(self.correlation) >= 1.0: + self.p_value = 0.0 + else: + #Confirm that this division works after future import + ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation)) + ZValue = ZValue*sqrt(self.overlap-3) + self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) + + + +#XZ, 01/14/2009: This method is for parallel computing only. +#XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1) +#XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected +def compute_corr(input_nnCorr, input_trait, input_list, computing_method): + + allcorrelations = [] + for line in input_list: + tokens = line.split('","') + tokens[-1] = tokens[-1][:-2] #remove the last " + tokens[0] = tokens[0][1:] #remove the first " + + traitdataName = tokens[0] + database_trait = tokens[1:] + + if computing_method == "1": #XZ: Pearson's r + corr,nOverlap = utility.webqtlUtil.calCorrelationText(input_trait, database_trait, input_nnCorr) + else: #XZ: Spearman's rho + corr,nOverlap = utility.webqtlUtil.calCorrelationRankText(input_trait, database_trait, input_nnCorr) + traitinfo = [traitdataName,corr,nOverlap] + allcorrelations.append(traitinfo) + + return allcorrelations + +def get_correlation_method_key(form_data): + #XZ, 09/28/2008: if user select "1", then display 1, 3 and 4. + #XZ, 09/28/2008: if user select "2", then display 2, 3 and 5. + #XZ, 09/28/2008: if user select "3", then display 1, 3 and 4. + #XZ, 09/28/2008: if user select "4", then display 1, 3 and 4. + #XZ, 09/28/2008: if user select "5", then display 2, 3 and 5. + + method = form_data.method + if method not in ["1", "2", "3" ,"4", "5"]: + return "1" + + return method + + +def get_custom_trait(form_data, cursor): + """Pulls the custom trait, if it exists, out of the form data""" + trait_name = form_data.fullname + + if trait_name: + trait = webqtlTrait(fullname=trait_name, cursor=cursor) + trait.retrieveInfo() + return trait + else: + return None + + +#XZ, 09/18/2008: get the information such as value, variance of the input strain names from the form. +def get_sample_data(fd): + #print("fd is:", pf(fd.__dict__)) + if fd.allstrainlist: + mdpchoice = fd.MDPChoice + #XZ, in HTML source code, it is "BXD Only", "BXH Only", and so on + if mdpchoice == "1": + strainlist = fd.f1list + fd.strainlist + #XZ, in HTML source code, it is "Non-BXD Only", "Non-BXD Only", etc + elif mdpchoice == "2": + strainlist = [] + strainlist2 = fd.f1list + fd.strainlist + for strain in fd.allstrainlist: + if strain not in strainlist2: + strainlist.append(strain) + #So called MDP Panel + if strainlist: + strainlist = fd.f1list + fd.parlist+strainlist + #XZ, in HTML source code, it is "All Cases" + else: + strainlist = fd.allstrainlist + #XZ, 09/18/2008: put the trait data into dictionary fd.allTraitData + fd.readData(fd.allstrainlist) + else: + mdpchoice = None + strainlist = fd.strainlist + #XZ, 09/18/2008: put the trait data into dictionary fd.allTraitData + fd.readData() + + return strainlist + + + +def get_species(fd, cursor): + #XZ, 3/16/2010: variable RISet must be pass by the form + RISet = fd.RISet + #XZ, 12/12/2008: get species infomation + species = webqtlDatabaseFunction.retrieveSpecies(cursor=cursor, RISet=RISet) + return species + + +def sortTraitCorrelations(traits, method="1"): + if method in TISSUE_METHODS: + traits.sort(key=lambda trait: trait.tissue_corr != None and abs(trait.tissue_corr), reverse=True) + elif method == METHOD_LIT: + traits.sort(key=lambda trait: trait.lit_corr != None and abs(trait.lit_corr), reverse=True) + else: + traits.sort(key=lambda trait: trait.correlation != None and abs(trait.correlation), reverse=True) + + return traits + + +def auth_user_for_db(db, cursor, target_db_name, privilege, username): + """Authorize a user for access to a database if that database is + confidential. A db (identified by a record in ProbeSetFreeze) contains a + list of authorized users who may access it, as well as its confidentiality + level. + + If the current user's privilege level is greater than 'user', ie: root or + admin, then they are automatically authed, otherwise, check the + AuthorizedUsers field for the presence of their name.""" + + if db.type == 'ProbeSet': + cursor.execute('SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name) + indId, indName, indFullName, confidential, AuthorisedUsers = cursor.fetchall()[0] + + if confidential: + authorized = 0 + + #for the dataset that confidentiality is 1 + #1. 'admin' and 'root' can see all of the dataset + #2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table) + if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: + authorized = 1 + else: + if username in AuthorisedUsers.split(","): + authorized = 1 + + if not authorized: + raise AuthException("The %s database you selected is not open to the public at this time, please go back and select other database." % indFullName) + + +class CorrelationResults(object): + + corr_min_informative = 4 + + #PAGE_HEADING = "Correlation Table" + #CORRELATION_METHODS = {"1" : "Genetic Correlation (Pearson's r)", + # "2" : "Genetic Correlation (Spearman's rho)", + # "3" : "SGO Literature Correlation", + # "4" : "Tissue Correlation (Pearson's r)", + # "5" : "Tissue Correlation (Spearman's rho)"} + # + #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1} + + + #def error(self, message, *args, **kw): + # heading = heading or self.PAGE_HEADING + # return templatePage.error(heading = heading, detail = [message], error=error) + + def __init__(self, start_vars): + #self.dataset = create_dataset(start_vars['dataset_name']) + #self.dataset.group.read_genotype_file() + #self.genotype = self.dataset.group.genotype + # + #self.this_trait = GeneralTrait(dataset=self.dataset.name, + # name=start_vars['trait_id'], + # cellid=None) + + helper_functions.get_dataset_and_trait(self, start_vars) + + self.samples = [] # Want only ones with values + self.vals = [] + self.variances = [] + + corr_samples_group = start_vars['corr_samples_group'] + if corr_samples_group != 'samples_other': + self.process_samples(start_vars, self.dataset.group.samplelist, ()) + #for sample in self.dataset.group.samplelist: + # value = start_vars['value:' + sample] + # variance = start_vars['variance:' + sample] + # if variance.strip().lower() == 'x': + # variance = 0 + # else: + # variance = float(variance) + # if value.strip().lower() != 'x': + # self.samples.append(str(sample)) + # self.vals.append(float(value)) + # self.variances.append(variance) + + if corr_samples_group != 'samples_primary': + primary_samples = (self.dataset.group.parlist + + self.dataset.group.f1list + + self.dataset.group.samplelist) + self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) + #for sample in self.this_trait.data.keys(): + # if sample not in primary_samples: + # value = start_vars['value:' + sample] + # variance = start_vars['variance:' + sample] + # if variance.strip().lower() == 'x': + # variance = 0 + # else: + # variance = float(variance) + # if value.strip().lower() != 'x': + # self.samples.append(str(sample)) + # self.vals.append(float(value)) + # self.variances.append(variance) + + print("self.samples is:", pf(self.samples)) + + #sample_list = get_sample_data(fd) + #print("sample_list is", pf(sample_list)) + + #XZ, 09/18/2008: get all information about the user selected database. + #target_db_name = fd.corr_dataset + self.target_db_name = start_vars['corr_dataset'] + + # Zach said this is ok + # Auth if needed + #try: + # auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName) + #except AuthException as e: + # detail = [e.message] + # return self.error(detail) + + #XZ, 09/18/2008: filter out the strains that have no value. + #self.sample_names, vals, vars, N = fd.informativeStrains(sample_list) + + #print("samplenames is:", pf(self.sample_names)) + #CF - If less than a minimum number of strains/cases in common, don't calculate anything + #if len(self.sample_names) < self.corr_min_informative: + # detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)] + # self.error(heading=None, detail=detail) + + #correlation_method = self.CORRELATION_METHODS[self.method] + #rankOrder = self.RANK_ORDERS[self.method] + + # CF - Number of results returned + # Todo: Get rid of self.returnNumber + + #self.record_count = 0 + + #myTrait = get_custom_trait(fd, self.cursor) + + + # We will not get Literature Correlations if there is no GeneId because there is nothing + # to look against + self.geneid = self.this_trait.geneid + + # We will not get Tissue Correlations if there is no gene symbol because there is nothing to look against + #self.trait_symbol = myTrait.symbol + + + #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid + self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.dataset.group.species, self.geneid) + + #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' + self.tissue_probeset_freeze_id = 1 + + traitList = self.correlate(self.vals) + + _log.info("Done doing correlation calculation") + +############################################################################################################################################ + + TD_LR = HT.TD(height=200,width="100%",bgColor='#eeeeee') + + mainfmName = webqtlUtil.genRandStr("fm_") + form = HT.Form(cgi = os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), + enctype='multipart/form-data', name= mainfmName, submit=HT.Input(type='hidden')) + hddn = {'FormID': 'showDatabase', + 'ProbeSetID': '_', + 'database': self.target_db_name, + 'databaseFull': self.db.fullname, + 'CellID': '_', + 'RISet': fd.RISet, + 'identification': fd.identification} + + if myTrait: + hddn['fullname'] = fd.fullname + if mdp_choice: + hddn['MDPChoice']= mdp_choice + + + #XZ, 09/18/2008: pass the trait data to next page by hidden parameters. + webqtlUtil.exportData(hddn, fd.allTraitData) + + if fd.incparentsf1: + hddn['incparentsf1']='ON' + + if fd.allstrainlist: + hddn['allstrainlist'] = string.join(fd.allstrainlist, ' ') + + + for key in hddn.keys(): + form.append(HT.Input(name=key, value=hddn[key], type='hidden')) + + #XZ, 11/21/2008: add two parameters to form + form.append(HT.Input(name="X_geneSymbol", value="", type='hidden')) + form.append(HT.Input(name="Y_geneSymbol", value="", type='hidden')) + + #XZ, 3/11/2010: add one parameter to record if the method is rank order. + form.append(HT.Input(name="rankOrder", value="%s" % rankOrder, type='hidden')) + + form.append(HT.Input(name="TissueProbeSetFreezeId", value="%s" % self.tissue_probeset_freeze_id, type='hidden')) + + #################################### + # generate the info on top of page # + #################################### + + info = self.getTopInfo(myTrait=myTrait, method=self.method, db=self.db, target_db_name=self.target_db_name, returnNumber=self.returnNumber, methodDict=self.CORRELATION_METHODS, totalTraits=traitList, identification=fd.identification ) + + ############## + # Excel file # + ############## + filename= webqtlUtil.genRandStr("Corr_") + xlsUrl = HT.Input(type='button', value = 'Download Table', onClick= "location.href='/tmp/%s.xls'" % filename, Class='button') + # Create a new Excel workbook + workbook = xl.Writer('%s.xls' % (webqtlConfig.TMPDIR+filename)) + headingStyle = workbook.add_format(align = 'center', bold = 1, border = 1, size=13, fg_color = 0x1E, color="white") + + #XZ, 3/18/2010: pay attention to the line number of header in this file. As of today, there are 7 lines. + worksheet = self.createExcelFileWithTitleAndFooter(workbook=workbook, identification=fd.identification, db=self.db, returnNumber=self.returnNumber) + + newrow = 7 + + +##################################################################### + + + #Select All, Deselect All, Invert Selection, Add to Collection + mintmap = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'showIntMap');" % mainfmName) + mintmap_img = HT.Image("/images/multiple_interval_mapping1_final.jpg", name='mintmap', alt="Multiple Interval Mapping", title="Multiple Interval Mapping", style="border:none;") + mintmap.append(mintmap_img) + mcorr = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'compCorr');" % mainfmName) + mcorr_img = HT.Image("/images/compare_correlates2_final.jpg", alt="Compare Correlates", title="Compare Correlates", style="border:none;") + mcorr.append(mcorr_img) + cormatrix = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'corMatrix');" % mainfmName) + cormatrix_img = HT.Image("/images/correlation_matrix1_final.jpg", alt="Correlation Matrix and PCA", title="Correlation Matrix and PCA", style="border:none;") + cormatrix.append(cormatrix_img) + networkGraph = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'networkGraph');" % mainfmName) + networkGraph_img = HT.Image("/images/network_graph1_final.jpg", name='mintmap', alt="Network Graphs", title="Network Graphs", style="border:none;") + networkGraph.append(networkGraph_img) + heatmap = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'heatmap');" % mainfmName) + heatmap_img = HT.Image("/images/heatmap2_final.jpg", name='mintmap', alt="QTL Heat Map and Clustering", title="QTL Heatmap and Clustering", style="border:none;") + heatmap.append(heatmap_img) + partialCorr = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'partialCorrInput');" % mainfmName) + partialCorr_img = HT.Image("/images/partial_correlation_final.jpg", name='partialCorr', alt="Partial Correlation", title="Partial Correlation", style="border:none;") + partialCorr.append(partialCorr_img) + addselect = HT.Href(url="#redirect", onClick="addRmvSelection('%s', document.getElementsByName('%s')[0], 'addToSelection');" % (fd.RISet, mainfmName)) + addselect_img = HT.Image("/images/add_collection1_final.jpg", name="addselect", alt="Add To Collection", title="Add To Collection", style="border:none;") + addselect.append(addselect_img) + selectall = HT.Href(url="#redirect", onClick="checkAll(document.getElementsByName('%s')[0]);" % mainfmName) + selectall_img = HT.Image("/images/select_all2_final.jpg", name="selectall", alt="Select All", title="Select All", style="border:none;") + selectall.append(selectall_img) + selectinvert = HT.Href(url="#redirect", onClick = "checkInvert(document.getElementsByName('%s')[0]);" % mainfmName) + selectinvert_img = HT.Image("/images/invert_selection2_final.jpg", name="selectinvert", alt="Invert Selection", title="Invert Selection", style="border:none;") + selectinvert.append(selectinvert_img) + reset = HT.Href(url="#redirect", onClick="checkNone(document.getElementsByName('%s')[0]); return false;" % mainfmName) + reset_img = HT.Image("/images/select_none2_final.jpg", alt="Select None", title="Select None", style="border:none;") + reset.append(reset_img) + selecttraits = HT.Input(type='button' ,name='selecttraits',value='Select Traits', onClick="checkTraits(this.form);",Class="button") + selectgt = HT.Input(type='text' ,name='selectgt',value='-1.0', size=6,maxlength=10,onChange="checkNumeric(this,1.0,'-1.0','gthan','greater than filed')") + selectlt = HT.Input(type='text' ,name='selectlt',value='1.0', size=6,maxlength=10,onChange="checkNumeric(this,-1.0,'1.0','lthan','less than field')") + selectandor = HT.Select(name='selectandor') + selectandor.append(('AND','and')) + selectandor.append(('OR','or')) + selectandor.selected.append('AND') + + + #External analysis tools + GCATButton = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'GCAT');" % mainfmName) + GCATButton_img = HT.Image("/images/GCAT_logo_final.jpg", name="GCAT", alt="GCAT", title="GCAT", style="border:none") + GCATButton.append(GCATButton_img) + + ODE = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'ODE');" % mainfmName) + ODE_img = HT.Image("/images/ODE_logo_final.jpg", name="ode", alt="ODE", title="ODE", style="border:none") + ODE.append(ODE_img) + + ''' + #XZ, 07/07/2010: I comment out this block of code. + WebGestaltScript = HT.Script(language="Javascript") + WebGestaltScript.append(""" +setTimeout('openWebGestalt()', 2000); +function openWebGestalt(){ +var thisForm = document['WebGestalt']; +makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); +} + """ % (mainfmName, len(traitList))) + ''' + + self.cursor.execute('SELECT GeneChip.GO_tree_value FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and ProbeSetFreeze.Name = "%s"' % self.db.name) + result = self.cursor.fetchone() + + if result: + GO_tree_value = result[0] + + if GO_tree_value: + + WebGestalt = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'GOTree');" % mainfmName) + WebGestalt_img = HT.Image("/images/webgestalt_icon_final.jpg", name="webgestalt", alt="Gene Set Analysis Toolkit", title="Gene Set Analysis Toolkit", style="border:none") + WebGestalt.append(WebGestalt_img) + + hddnWebGestalt = { + 'id_list':'', + 'correlation':'', + 'id_value':'', + 'llid_list':'', + 'id_type':GO_tree_value, + 'idtype':'', + 'species':'', + 'list':'', + 'client':''} + + hddnWebGestalt['ref_type'] = hddnWebGestalt['id_type'] + hddnWebGestalt['cat_type'] = 'GO' + hddnWebGestalt['significancelevel'] = 'Top10' + + if self.species == 'rat': + hddnWebGestalt['org'] = 'Rattus norvegicus' + elif self.species == 'human': + hddnWebGestalt['org'] = 'Homo sapiens' + elif self.species == 'mouse': + hddnWebGestalt['org'] = 'Mus musculus' + else: + hddnWebGestalt['org'] = '' + + for key in hddnWebGestalt.keys(): + form.append(HT.Input(name=key, value=hddnWebGestalt[key], type='hidden')) + + + #Create tables with options, etc + + pageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%", border=0, align="Left") + + containerTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="90%",border=0, align="Left") + + + if not GO_tree_value: + optionsTable = HT.TableLite(cellSpacing=2, cellPadding=0,width="480", height="80", border=0, align="Left") + optionsTable.append(HT.TR(HT.TD(selectall), HT.TD(reset), HT.TD(selectinvert), HT.TD(addselect), HT.TD(GCATButton), HT.TD(ODE), align="left")) + optionsTable.append(HT.TR(HT.TD(" "*1,"Select"), HT.TD("Deselect"), HT.TD(" "*1,"Invert"), HT.TD(" "*3,"Add"), HT.TD("Gene Set"), HT.TD(" "*2,"GCAT"))) + else: + optionsTable = HT.TableLite(cellSpacing=2, cellPadding=0,width="560", height="80", border=0, align="Left") + optionsTable.append(HT.TR(HT.TD(selectall), HT.TD(reset), HT.TD(selectinvert), HT.TD(addselect), HT.TD(GCATButton), HT.TD(ODE), HT.TD(WebGestalt), align="left")) + optionsTable.append(HT.TR(HT.TD(" "*1,"Select"), HT.TD("Deselect"), HT.TD(" "*1,"Invert"), HT.TD(" "*3,"Add"), HT.TD("Gene Set"), HT.TD(" "*2,"GCAT"), HT.TD(" "*3, "ODE"))) + containerTable.append(HT.TR(HT.TD(optionsTable))) + + functionTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="480",height="80", border=0, align="Left") + functionRow = HT.TR(HT.TD(networkGraph, width="16.7%"), HT.TD(cormatrix, width="16.7%"), HT.TD(partialCorr, width="16.7%"), HT.TD(mcorr, width="16.7%"), HT.TD(mintmap, width="16.7%"), HT.TD(heatmap), align="left") + labelRow = HT.TR(HT.TD(" "*1,HT.Text("Graph")), HT.TD(" "*1,HT.Text("Matrix")), HT.TD(" "*1,HT.Text("Partial")), HT.TD(HT.Text("Compare")), HT.TD(HT.Text("QTL Map")), HT.TD(HT.Text(text="Heat Map"))) + functionTable.append(functionRow, labelRow) + containerTable.append(HT.TR(HT.TD(functionTable), HT.BR())) + + #more_options = HT.Image("/images/more_options1_final.jpg", name='more_options', alt="Expand Options", title="Expand Options", style="border:none;", Class="toggleShowHide") + + #containerTable.append(HT.TR(HT.TD(more_options, HT.BR(), HT.BR()))) + + moreOptions = HT.Input(type='button',name='options',value='More Options', onClick="",Class="toggle") + fewerOptions = HT.Input(type='button',name='options',value='Fewer Options', onClick="",Class="toggle") + + """ + if (fd.formdata.getvalue('showHideOptions') == 'less'): + containerTable.append(HT.TR(HT.TD(" "), height="10"), HT.TR(HT.TD(HT.Div(fewerOptions, Class="toggleShowHide")))) + containerTable.append(HT.TR(HT.TD(" "))) + else: + containerTable.append(HT.TR(HT.TD(" "), height="10"), HT.TR(HT.TD(HT.Div(moreOptions, Class="toggleShowHide")))) + containerTable.append(HT.TR(HT.TD(" "))) + """ + + containerTable.append(HT.TR(HT.TD(HT.Span(selecttraits,' with r > ',selectgt, ' ',selectandor, ' r < ',selectlt,Class="bd1 cbddf fs11")), style="display:none;", Class="extra_options")) + + chrMenu = HT.Input(type='hidden',name='chromosomes',value='all') + + corrHeading = HT.Paragraph('Correlation Table', Class="title") + + + tblobj = {} + + if self.db.type=="Geno": + containerTable.append(HT.TR(HT.TD(xlsUrl, height=60))) + + pageTable.append(HT.TR(HT.TD(containerTable))) + + tblobj['header'], worksheet = self.getTableHeaderForGeno( method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + newrow += 1 + + sortby = self.getSortByValue( calculationMethod = self.method ) + + corrScript = HT.Script(language="Javascript") + corrScript.append("var corrArray = new Array();") + + tblobj['body'], worksheet, corrScript = self.getTableBodyForGeno(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript) + + workbook.close() + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + + div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), corrScript, Id="sortable") + + pageTable.append(HT.TR(HT.TD(div))) + + form.append(HT.Input(name='ShowStrains',type='hidden', value =1), + HT.Input(name='ShowLine',type='hidden', value =1), + HT.P(), HT.P(), pageTable) + TD_LR.append(corrHeading, info, form, HT.P()) + + self.dict['body'] = str(TD_LR) + self.dict['js1'] = '' + self.dict['title'] = 'Correlation' + + elif self.db.type=="Publish": + + containerTable.append(HT.TR(HT.TD(xlsUrl, height=40))) + + pageTable.append(HT.TR(HT.TD(containerTable))) + + tblobj['header'], worksheet = self.getTableHeaderForPublish(method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + newrow += 1 + + sortby = self.getSortByValue( calculationMethod = self.method ) + + corrScript = HT.Script(language="Javascript") + corrScript.append("var corrArray = new Array();") + + tblobj['body'], worksheet, corrScript = self.getTableBodyForPublish(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript, species=self.species) + + workbook.close() + + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + # NL, 07/27/2010. genTableObj function has been moved from templatePage.py to webqtlUtil.py; + div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), corrScript, Id="sortable") + + pageTable.append(HT.TR(HT.TD(div))) + + form.append( + HT.Input(name='ShowStrains',type='hidden', value =1), + HT.Input(name='ShowLine',type='hidden', value =1), + HT.P(), pageTable) + TD_LR.append(corrHeading, info, form, HT.P()) + + self.dict['body'] = str(TD_LR) + self.dict['js1'] = '' + self.dict['title'] = 'Correlation' + + + elif self.db.type=="ProbeSet": + tblobj['header'], worksheet = self.getTableHeaderForProbeSet(method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + newrow += 1 + + sortby = self.getSortByValue( calculationMethod = self.method ) + + corrScript = HT.Script(language="Javascript") + corrScript.append("var corrArray = new Array();") + + tblobj['body'], worksheet, corrScript = self.getTableBodyForProbeSet(traitList=traitList, primaryTrait=myTrait, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript, species=self.species) + + workbook.close() + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + + #XZ: here is the table of traits + div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1", hiddenColumns=["Gene ID","Homologene ID"]), corrScript, Id="sortable") + + + #XZ, 01/12/2009: create database menu for 'Add Correlation' + self.cursor.execute(""" + select + ProbeSetFreeze.FullName, ProbeSetFreeze.Id, Tissue.name + from + ProbeSetFreeze, ProbeFreeze, ProbeSetFreeze as ps2, ProbeFreeze as p2, Tissue + where + ps2.Id = %d + and ps2.ProbeFreezeId = p2.Id + and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id + and (ProbeFreeze.InbredSetId = p2.InbredSetId or (ProbeFreeze.InbredSetId in (1, 3) and p2.InbredSetId in (1, 3))) + and p2.ChipId = ProbeFreeze.ChipId + and ps2.Id != ProbeSetFreeze.Id + and ProbeFreeze.TissueId = Tissue.Id + and ProbeSetFreeze.public > %d + order by + ProbeFreeze.TissueId, ProbeSetFreeze.CreateTime desc + """ % (self.db.id, webqtlConfig.PUBLICTHRESH)) + + results = self.cursor.fetchall() + dbCustomizer = HT.Select(results, name = "customizer") + databaseMenuSub = preTissue = "" + for item in results: + TName, TId, TTissue = item + if TTissue != preTissue: + if databaseMenuSub: + dbCustomizer.append(databaseMenuSub) + databaseMenuSub = HT.Optgroup(label = '%s mRNA ------' % TTissue) + preTissue = TTissue + + databaseMenuSub.append(item[:2]) + if databaseMenuSub: + dbCustomizer.append(databaseMenuSub) + + #updated by NL. Delete function generateJavaScript, move js files to dhtml.js, webqtl.js and jqueryFunction.js + #variables: filename, strainIds and vals are required by getquerystring function + strainIds=self.getStrainIds(species=self.species, strains=self.sample_names) + var1 = HT.Input(name="filename", value=filename, type='hidden') + var2 = HT.Input(name="strainIds", value=strainIds, type='hidden') + var3 = HT.Input(name="vals", value=vals, type='hidden') + customizerButton = HT.Input(type="button", Class="button", value="Add Correlation", onClick = "xmlhttpPost('%smain.py?FormID=AJAX_table', 'sortable', (getquerystring(this.form)))" % webqtlConfig.CGIDIR) + + containerTable.append(HT.TR(HT.TD(HT.Span(var1,var2,var3,customizerButton, "with", dbCustomizer, Class="bd1 cbddf fs11"), HT.BR(), HT.BR()), style="display:none;", Class="extra_options")) + + containerTable.append(HT.TR(HT.TD(xlsUrl, HT.BR(), HT.BR()))) + + pageTable.append(HT.TR(HT.TD(containerTable))) + + pageTable.append(HT.TR(HT.TD(div))) + + if self.species == 'human': + heatmap = "" + + form.append(HT.Input(name='ShowStrains',type='hidden', value =1), + HT.Input(name='ShowLine',type='hidden', value =1), + info, HT.BR(), pageTable, HT.BR()) + + TD_LR.append(corrHeading, form, HT.P()) + + + self.dict['body'] = str(TD_LR) + self.dict['title'] = 'Correlation' + # updated by NL. Delete function generateJavaScript, move js files to dhtml.js, webqtl.js and jqueryFunction.js + self.dict['js1'] = '' + self.dict['js2'] = 'onLoad="pageOffset()"' + self.dict['layer'] = self.generateWarningLayer() + else: + self.dict['body'] = "" + + def process_samples(self, start_vars, sample_names, excluded_samples): + for sample in sample_names: + if sample not in excluded_samples: + value = start_vars['value:' + sample] + variance = start_vars['variance:' + sample] + if variance.strip().lower() == 'x': + variance = 0 + else: + variance = float(variance) + if value.strip().lower() != 'x': + self.samples.append(str(sample)) + self.vals.append(float(value)) + self.variances.append(variance) + + def getSortByValue(self, calculationMethod): + + if calculationMethod == "1": + sortby = ("Sample p(r)", "up") + elif calculationMethod == "2": + sortby = ("Sample p(rho)", "up") + elif calculationMethod == "3": #XZ: literature correlation + sortby = ("Lit Corr","down") + elif calculationMethod == "4": #XZ: tissue correlation + sortby = ("Tissue r", "down") + elif calculationMethod == "5": + sortby = ("Tissue rho", "down") + + return sortby + + + + def generateWarningLayer(self): + + layerString = """ + + + + + """ + + return layerString + + + #XZ, 01/07/2009: In HTML code, the variable 'database' corresponds to the column 'Name' in database table. + def getFileName(self, target_db_name): ### dcrowell August 2008 + """Returns the name of the reference database file with which correlations are calculated. + Takes argument cursor which is a cursor object of any instance of a subclass of templatePage + Used by correlationPage""" +ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name + self.cursor.execute(query) + result = self.cursor.fetchone() + Id = result[0] + FullName = result[1] + FullName = FullName.replace(' ','_') + FullName = FullName.replace('/','_') + + FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt' + + return FileName + query = 'SELECT Id, FullName F + + + #XZ, 01/29/2009: I modified this function. + #XZ: Note that the type of StrainIds must be number, not string. + def getStrainIds(self, species=None, strains=[]): + StrainIds = [] + for item in strains: + self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE + Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species)) + Id = self.cursor.fetchone()[0] + StrainIds.append(Id) + + return StrainIds + + + #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid + #XZ, 12/12/2008: if the input geneid is 'None', return 0 + #XZ, 12/12/2008: if the input geneid has no corresponding mouse geneid, return 0 + def translateToMouseGeneID(self, species, geneid): + #mouse_geneid = 0 + + if not geneid: + return 0 + + #self.id, self.name, self.fullname, self.shortname = g.db.execute(""" + # SELECT Id, Name, FullName, ShortName + # FROM %s + # WHERE public > %s AND + # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') + # """ % (query_args)).fetchone() + + if species == 'mouse': + mouse_geneid = geneid + elif species == 'rat': + mouse_geneid = g.db.execute( + """SELECT mouse FROM GeneIDXRef WHERE rat='%d'""", int(geneid)).fetchone().mouse + #if record: + # mouse_geneid = record[0] + elif species == 'human': + mouse_geneid = g.db.execute( + """SELECT mouse FROM GeneIDXRef WHERE human='%d'""", int(geneid)).fetchone().mouse + #if record: + # mouse_geneid = record[0] + print("mouse_geneid:", mouse_geneid) + return mouse_geneid + + + #XZ, 12/16/2008: the input geneid is of mouse type + def checkForLitInfo(self,geneId): + q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId + self.cursor.execute(q) + try: + x = self.cursor.fetchone() + if x: return True + else: raise + except: return False + + + #XZ, 12/16/2008: the input geneid is of mouse type + def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): + q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) + self.cursor.execute(q) + try: + x = self.cursor.fetchone() + if x: return True + else: raise + except: return False + + + def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId): + + StrainIds = [] + for item in strains: + self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species)) + Id = self.cursor.fetchone()[0] + StrainIds.append('%d' % Id) + + # break it into smaller chunks so we don't overload the MySql server + nnn = len(StrainIds) / 25 + if len(StrainIds) % 25: + nnn += 1 + oridata = [] + + #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId + tempTable = None + if GeneId and db.type == "ProbeSet": + if method == "3": + tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber) + + if method == "4" or method == "5": + tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber) + + for step in range(nnn): + temp = [] + StrainIdstep = StrainIds[step*25:min(len(StrainIds), (step+1)*25)] + for item in StrainIdstep: temp.append('T%s.value' % item) + + if db.type == "Publish": + query = "SELECT PublishXRef.Id, " + dataStartPos = 1 + query += string.join(temp,', ') + query += ' FROM (PublishXRef, PublishFreeze)' + #XZ, 03/04/2009: Xiaodong changed Data to PublishData + for item in StrainIdstep: + query += 'left join PublishData as T%s on T%s.Id = PublishXRef.DataId and T%s.StrainId=%s\n' %(item,item,item,item) + query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, ) + #XZ, 09/20/2008: extract literature correlation value together with gene expression values. + #XZ, 09/20/2008: notice the difference between the code in next block. + elif tempTable: + # we can get a little performance out of selecting our LitCorr here + # but also we need to do this because we are unconcerned with probes that have no geneId associated with them + # as we would not have litCorr data. + + if method == "3": + query = "SELECT %s.Name, %s.value," % (db.type,tempTable) + dataStartPos = 2 + if method == "4" or method == "5": + query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) + dataStartPos = 3 + + query += string.join(temp,', ') + query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) + if method == "3": + query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) + if method == "4" or method == "5": + query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) + #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + for item in StrainIdstep: + query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) + + if method == "3": + query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + if method == "4" or method == "5": + query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + else: + query = "SELECT %s.Name," % db.type + dataStartPos = 1 + query += string.join(temp,', ') + query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) + #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + for item in StrainIdstep: + query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) + query += "WHERE %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + + self.cursor.execute(query) + results = self.cursor.fetchall() + oridata.append(results) + + datasize = len(oridata[0]) + traits = [] + # put all of the separate data together into a huge list of lists + for j in range(datasize): + traitdata = list(oridata[0][j]) + for i in range(1,nnn): + traitdata += list(oridata[i][j][dataStartPos:]) + + trait = Trait(traitdata[0], traitdata[dataStartPos:]) + + if method == METHOD_LIT: + trait.lit_corr = traitdata[1] + + if method in TISSUE_METHODS: + trait.tissue_corr = traitdata[1] + trait.p_tissue = traitdata[2] + + traits.append(trait) + + if tempTable: + self.cursor.execute( 'DROP TEMPORARY TABLE %s' % tempTable ) + + return traits + + + + + # XZ, 09/20/2008: This function creates TEMPORARY TABLE tmpTableName_2 and return its name. + # XZ, 09/20/2008: It stores top literature correlation values associated with the input geneId. + # XZ, 09/20/2008: Attention: In each row, the input geneId is always in column GeneId1. + #XZ, 12/16/2008: the input geneid can be of mouse, rat or human type + def getTempLiteratureTable(self, species, input_species_geneid, returnNumber): + # according to mysql the TEMPORARY TABLE name should not have to be unique because + # it is only available to the current connection. This program will be invoked via command line, but if it + # were to be invoked over mod_python this could cuase problems. mod_python will keep the connection alive + # in its executing threads ( i think) so there is a potential for the table not being dropped between users. + #XZ, 01/29/2009: To prevent the potential risk, I generate random table names and drop the tables after use them. + + + # the 'input_species_geneid' could be rat or human geneid, need to translate it to mouse geneid + translated_mouse_geneid = self.translateToMouseGeneID (species, input_species_geneid) + + tmpTableName_1 = webqtlUtil.genRandStr(prefix="LITERATURE") + + q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_1 + q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName_1, translated_mouse_geneid) + q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName_1, translated_mouse_geneid,translated_mouse_geneid) + for x in [q1,q2,q3]: self.cursor.execute(x) + + #XZ, 09/23/2008: Just use the top records insteard of using all records + tmpTableName_2 = webqtlUtil.genRandStr(prefix="TOPLITERATURE") + + q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_2 + self.cursor.execute(q1) + q2 = 'SELECT GeneId1, GeneId2, value FROM %s ORDER BY value DESC' % tmpTableName_1 + self.cursor.execute(q2) + result = self.cursor.fetchall() + + counter = 0 #this is to count how many records being inserted into table + for one_row in result: + mouse_geneid1, mouse_geneid2, lit_corr_alue = one_row + + #mouse_geneid1 has been tested before, now should test if mouse_geneid2 has corresponding geneid in other species + translated_species_geneid = 0 + if species == 'mouse': + translated_species_geneid = mouse_geneid2 + elif species == 'rat': + self.cursor.execute( "SELECT rat FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) ) + record = self.cursor.fetchone() + if record: + translated_species_geneid = record[0] + elif species == 'human': + self.cursor.execute( "SELECT human FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) ) + record = self.cursor.fetchone() + if record: + translated_species_geneid = record[0] + + if translated_species_geneid: + self.cursor.execute( 'INSERT INTO %s (GeneId1, GeneId2, value) VALUES (%d,%d,%f)' % (tmpTableName_2, int(input_species_geneid),int(translated_species_geneid), float(lit_corr_alue)) ) + counter = counter + 1 + + #pay attention to the number + if (counter > 2*returnNumber): + break + + self.cursor.execute('DROP TEMPORARY TABLE %s' % tmpTableName_1) + + return tmpTableName_2 + + + + #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2 + #XZ, 09/24/2008: Note that the correlation value can be negative. + def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0): + + def cmpTissCorrAbsoluteValue(A, B): + try: + if abs(A[1]) < abs(B[1]): return 1 + elif abs(A[1]) == abs(B[1]): + return 0 + else: return -1 + except: + return 0 + + symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method) + + symbolCorrList = symbolCorrDict.items() + + symbolCorrList.sort(cmpTissCorrAbsoluteValue) + symbolCorrList = symbolCorrList[0 : 2*returnNumber] + + tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE") + + q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName + self.cursor.execute(q1) + + for one_pair in symbolCorrList: + one_symbol = one_pair[0] + one_corr = one_pair[1] + one_p_value = symbolPvalueDict[one_symbol] + + self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) ) + + return tmpTableName + + + #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it. + def fetchLitCorrelations(self, species, GeneId, db, returnNumber): ### Used to generate Lit Correlations when calculations are done from text file. dcrowell August 2008 + """Uses getTempLiteratureTable to generate table of literatire correlations. This function then gathers that data and + pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance. + Returns a dictionary of 'TraitID':'LitCorr' for the requested correlation""" + + tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber) + + query = "SELECT %s.Name, %s.value" % (db.type,tempTable) + query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) + query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) + query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable, db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + + self.cursor.execute(query) + results = self.cursor.fetchall() + + litCorrDict = {} + + for entry in results: + traitName,litcorr = entry + litCorrDict[traitName] = litcorr + + self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable) + + return litCorrDict + + + + #XZ, 01/09/2009: Xiaodong created this function. + def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0): + """Uses getTempTissueCorrTable to generate table of tissue correlations. This function then gathers that data and + pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance. + Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation""" + + + tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber) + + query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" % (tempTable, tempTable) + query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)' + query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) + query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable) + + self.cursor.execute(query) + results = self.cursor.fetchall() + + tissueCorrDict = {} + + for entry in results: + traitName, tissueCorr, tissuePValue = entry + tissueCorrDict[traitName] = (tissueCorr, tissuePValue) + + self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable) + + return tissueCorrDict + + + + #XZ, 01/13/2008 + def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None): + + tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE") + + q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName + q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName, input_trait_mouse_geneid) + q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName, input_trait_mouse_geneid, input_trait_mouse_geneid) + + for x in [q1,q2,q3]: + self.cursor.execute(x) + + for thisTrait in traitList: + try: + if thisTrait.geneid: + thisTrait.mouse_geneid = self.translateToMouseGeneID(species, thisTrait.geneid) + else: + thisTrait.mouse_geneid = 0 + except: + thisTrait.mouse_geneid = 0 + + if thisTrait.mouse_geneid and str(thisTrait.mouse_geneid).find(";") == -1: + try: + self.cursor.execute("SELECT value FROM %s WHERE GeneId2 = %s" % (tmpTableName, thisTrait.mouse_geneid)) + result = self.cursor.fetchone() + if result: + thisTrait.LCorr = result[0] + else: + thisTrait.LCorr = None + except: + thisTrait.LCorr = None + else: + thisTrait.LCorr = None + + self.cursor.execute("DROP TEMPORARY TABLE %s" % tmpTableName) + + return traitList + + def get_traits(self, vals): + + #Todo: Redo cached stuff using memcached + if False: + _log.info("Using the fast method because the file exists") + lit_corrs = {} + tissue_corrs = {} + use_lit = False + if self.method == METHOD_LIT: + lit_corrs = self.fetchLitCorrelations(species=self.species, GeneId=self.gene_id, db=self.db, returnNumber=self.returnNumber) + use_lit = True + + use_tissue_corr = False + if self.method in TISSUE_METHODS: + tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber) + use_tissue_corr = True + + DatabaseFileName = self.getFileName( target_db_name=self.target_db_name ) + datasetFile = open(webqtlConfig.TEXTDIR+DatabaseFileName,'r') + + #XZ, 01/08/2009: read the first line + line = datasetFile.readline() + cached_sample_names = webqtlUtil.readLineCSV(line)[1:] + + #XZ, 01/08/2009: This step is critical. It is necessary for this new method. + #XZ: The original function fetchAllDatabaseData uses all strains stored in variable _strains to + #XZ: retrieve the values of each strain from database in real time. + #XZ: The new method uses all strains stored in variable dataset_strains to create a new variable + #XZ: _newvals. _newvals has the same length as dataset_strains. The items in _newvals is in + #XZ: the same order of items in dataset_strains. The value of each item in _newvals is either + #XZ: the value of correspinding strain in _vals or 'None'. + new_vals = [] + for name in cached_sample_names: + if name in self.sample_names: + new_vals.append(float(vals[self.sample_names.index(name)])) + else: + new_vals.append('None') + + nnCorr = len(new_vals) + + #XZ, 01/14/2009: If literature corr or tissue corr is selected, + #XZ: there is no need to use parallel computing. + + traits = [] + data_start = 1 + for line in datasetFile: + raw_trait = webqtlUtil.readLineCSV(line) + trait = Trait.from_csv(raw_trait, data_start) + trait.lit_corr = lit_corrs.get(trait.name) + trait.tissue_corr, trait.p_tissue = tissue_corrs.get(trait.name, (None, None)) + traits.append(trait) + + return traits, new_vals + + else: + #_log.info("Using the slow method for correlation") + # + #_log.info("Fetching from database") + traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) + #_log.info("Done fetching from database") + totalTraits = len(traits) #XZ, 09/18/2008: total trait number + + return traits + + + def do_parallel_correlation(self): + _log.info("Invoking parallel computing") + input_line_list = datasetFile.readlines() + _log.info("Read lines from the file") + all_line_number = len(input_line_list) + + step = 1000 + job_number = math.ceil( float(all_line_number)/step ) + + job_input_lists = [] + + _log.info("Configuring jobs") + + for job_index in range( int(job_number) ): + starti = job_index*step + endi = min((job_index+1)*step, all_line_number) + + one_job_input_list = [] + + for i in range( starti, endi ): + one_job_input_list.append( input_line_list[i] ) + + job_input_lists.append( one_job_input_list ) + + _log.info("Creating pp servers") + + ppservers = () + # Creates jobserver with automatically detected number of workers + job_server = pp.Server(ppservers=ppservers) + + _log.info("Done creating servers") + + jobs = [] + results = [] + + _log.info("Starting parallel computation, submitting jobs") + for one_job_input_list in job_input_lists: #pay attention to modules from outside + jobs.append( job_server.submit(func=compute_corr, args=(nnCorr, _newvals, one_job_input_list, self.method), depfuncs=(), modules=("utility.webqtlUtil",)) ) + _log.info("Done submitting jobs") + + for one_job in jobs: + one_result = one_job() + results.append( one_result ) + + _log.info("Acquiring results") + + for one_result in results: + for one_traitinfo in one_result: + allcorrelations.append( one_traitinfo ) + + _log.info("Appending the results") + + datasetFile.close() + totalTraits = len(allcorrelations) + _log.info("Done correlating using the fast method") + + + def correlate(self): + + correlations = [] + + #XZ: Use the fast method only for probeset dataset, and this dataset must have been created. + #XZ: Otherwise, use original method + #_log.info("Entering correlation") + + #db_filename = self.getFileName(target_db_name=self.target_db_name) + # + #cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR) + + # If the cache file exists, do a cached correlation for probeset data + if self.db.type == "ProbeSet": +# if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available: +# traits = do_parallel_correlation() +# +# else: + + traits = self.get_traits(self.vals) + + for trait in traits: + trait.calculate_correlation(vals, self.method) + + self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later + + #XZ, 3/31/2010: Theoretically, we should create one function 'comTissueCorr' + #to compare each trait by their tissue corr p values. + #But because the tissue corr p values are generated by permutation test, + #the top ones always have p value 0. So comparing p values actually does nothing. + #In addition, for the tissue data in our database, the N is always the same. + #So it's safe to compare with tissue corr statistic value. + #That's the same as literature corr. + #if self.method in [METHOD_LIT, METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] and self.gene_id: + # traits.sort(webqtlUtil.cmpLitCorr) + #else: + #if self.method in TISSUE_METHODS: + # sort(traits, key=lambda A: math.fabs(A.tissue_corr)) + #elif self.method == METHOD_LIT: + # traits.sort(traits, key=lambda A: math.fabs(A.lit_corr)) + #else: + traits = sortTraitCorrelations(traits, self.method) + + # Strip to the top N correlations + traits = traits[:min(self.returnNumber, len(traits))] + + addLiteratureCorr = False + addTissueCorr = False + + trait_list = [] + for trait in traits: + db_trait = webqtlTrait(db=self.db, name=trait.name, cursor=self.cursor) + db_trait.retrieveInfo( QTL='Yes' ) + + db_trait.Name = trait.name + db_trait.corr = trait.correlation + db_trait.nOverlap = trait.overlap + db_trait.corrPValue = trait.p_value + + # NL, 07/19/2010 + # js function changed, add a new parameter rankOrder for js function 'showTissueCorrPlot' + db_trait.RANK_ORDER = self.RANK_ORDERS[self.method] + + #XZ, 26/09/2008: Method is 4 or 5. Have fetched tissue corr, but no literature correlation yet. + if self.method in TISSUE_METHODS: + db_trait.tissueCorr = trait.tissue_corr + db_trait.tissuePValue = trait.p_tissue + addTissueCorr = True + + + #XZ, 26/09/2008: Method is 3, Have fetched literature corr, but no tissue corr yet. + elif self.method == METHOD_LIT: + db_trait.LCorr = trait.lit_corr + db_trait.mouse_geneid = self.translateToMouseGeneID(self.species, db_trait.geneid) + addLiteratureCorr = True + + #XZ, 26/09/2008: Method is 1 or 2. Have NOT fetched literature corr and tissue corr yet. + # Phenotype data will not have geneid, and neither will some probes + # we need to handle this because we will get an attribute error + else: + if self.input_trait_mouse_gene_id and self.db.type=="ProbeSet": + addLiteratureCorr = True + if self.trait_symbol and self.db.type=="ProbeSet": + addTissueCorr = True + + trait_list.append(db_trait) + + if addLiteratureCorr: + trait_list = self.getLiteratureCorrelationByList(self.input_trait_mouse_gene_id, + self.species, trait_list) + if addTissueCorr: + trait_list = self.getTissueCorrelationByList( + primaryTraitSymbol = self.trait_symbol, + traitList = trait_list, + TissueProbeSetFreezeId = TISSUE_MOUSE_DB, + method=self.method) + + return trait_list + + + def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None): + + symbolCorrDict = {} + symbolPvalueDict = {} + + primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) + primaryTraitValue = primaryTraitSymbolValueDict.values()[0] + + SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) + + if method in ["2","5"]: + symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman') + else: + symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict) + + + return (symbolCorrDict, symbolPvalueDict) + + + + #XZ, 10/13/2010 + def getTissueCorrelationByList(self, primaryTraitSymbol=None, traitList=None, TissueProbeSetFreezeId=None, method=None): + + primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) + + if primaryTraitSymbol.lower() in primaryTraitSymbolValueDict: + primaryTraitValue = primaryTraitSymbolValueDict[primaryTraitSymbol.lower()] + + geneSymbolList = [] + + for thisTrait in traitList: + if hasattr(thisTrait, 'symbol'): + geneSymbolList.append(thisTrait.symbol) + + SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=geneSymbolList, TissueProbeSetFreezeId=TISSUE_MOUSE_DB) + + for thisTrait in traitList: + if hasattr(thisTrait, 'symbol') and thisTrait.symbol and thisTrait.symbol.lower() in SymbolValueDict: + oneTraitValue = SymbolValueDict[thisTrait.symbol.lower()] + if method in ["2","5"]: + result = correlationFunction.calZeroOrderCorrForTiss( primaryTraitValue, oneTraitValue, method='spearman' ) + else: + result = correlationFunction.calZeroOrderCorrForTiss( primaryTraitValue, oneTraitValue) + thisTrait.tissueCorr = result[0] + thisTrait.tissuePValue = result[2] + else: + thisTrait.tissueCorr = None + thisTrait.tissuePValue = None + else: + for thisTrait in traitList: + thisTrait.tissueCorr = None + thisTrait.tissuePValue = None + + return traitList + + + def getTopInfo(self, myTrait=None, method=None, db=None, target_db_name=None, returnNumber=None, methodDict=None, totalTraits=None, identification=None ): + + if myTrait: + if method in ["1","2"]: #genetic correlation + info = HT.Paragraph("Values of Record %s in the " % myTrait.getGivenName(), HT.Href(text=myTrait.db.fullname,url=webqtlConfig.INFOPAGEHREF % myTrait.db.name,target="_blank", Class="fwn"), + " database were compared to all %d records in the " % self.record_count, HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank", Class="fwn"), + ' database. The top %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), + ' You can resort this list using the small arrowheads in the top row.') + else: + #myTrait.retrieveInfo()#need to know geneid and symbol + if method == "3":#literature correlation + searchDBName = "Literature Correlation" + searchDBLink = "/correlationAnnotation.html#literatureCorr" + else: #tissue correlation + searchDBName = "Tissue Correlation" + searchDBLink = "/correlationAnnotation.html#tissueCorr" + info = HT.Paragraph("Your input record %s in the " % myTrait.getGivenName(), HT.Href(text=myTrait.db.fullname,url=webqtlConfig.INFOPAGEHREF % myTrait.db.name,target="_blank", Class="fwn"), + " database corresponds to ", + HT.Href(text='gene Id %s, and gene symbol %s' % (myTrait.geneid, myTrait.symbol), target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % myTrait.geneid, Class="fs12 fwn"), + '. GN ranked all genes in the ', HT.Href(text=searchDBName,url=searchDBLink,target="_blank", Class="fwn"),' database by the %s.' % methodDict[method], + ' The top %d probes or probesets in the ' % returnNumber, HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank", Class="fwn"), + ' database corresponding to the top genes ranked by the %s are displayed.' %( methodDict[method]), + ' You can resort this list using the small arrowheads in the top row.' ) + + elif identification: + info = HT.Paragraph('Values of %s were compared to all %d traits in ' % (identification, self.record_count), + HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank",Class="fwn"), + ' database. The TOP %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), + ' You can resort this list using the small arrowheads in the top row.') + + else: + info = HT.Paragraph('Trait values were compared to all values in ', + HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank",Class="fwn"), + ' database. The TOP %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), + ' You can resort this list using the small arrowheads in the top row.') + + if db.type=="Geno": + info.append(HT.BR(),HT.BR(),'Clicking on the Locus will open the genotypes data for that locus. Click on the correlation to see a scatter plot of the trait data.') + elif db.type=="Publish": + info.append(HT.BR(),HT.BR(),'Clicking on the record ID will open the published phenotype data for that publication. Click on the correlation to see a scatter plot of the trait data. ') + elif db.type=="ProbeSet": + info.append(HT.BR(),'Click the correlation values to generate scatter plots. Select the Record ID to open the Trait Data and Analysis form. Select the symbol to open NCBI Entrez.') + else: + pass + + + return info + + + def createExcelFileWithTitleAndFooter(self, workbook=None, identification=None, db=None, returnNumber=None): + + worksheet = workbook.add_worksheet() + + titleStyle = workbook.add_format(align = 'left', bold = 0, size=14, border = 1, border_color="gray") + + ##Write title Info + # Modified by Hongqiang Li + worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) + worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) + worksheet.write([2, 0], "Trait : %s" % identification, titleStyle) + worksheet.write([3, 0], "Database : %s" % db.fullname, titleStyle) + worksheet.write([4, 0], "Date : %s" % time.strftime("%B %d, %Y", time.gmtime()), titleStyle) + worksheet.write([5, 0], "Time : %s GMT" % time.strftime("%H:%M ", time.gmtime()), titleStyle) + worksheet.write([6, 0], "Status of data ownership: Possibly unpublished data; please see %s/statusandContact.html for details on sources, ownership, and usage of these data." % webqtlConfig.PORTADDR, titleStyle) + #Write footer info + worksheet.write([9 + returnNumber, 0], "Funding for The GeneNetwork: NIAAA (U01AA13499, U24AA13513), NIDA, NIMH, and NIAAA (P20-DA21131), NCI MMHCC (U01CA105417), and NCRR (U01NR 105417)", titleStyle) + worksheet.write([10 + returnNumber, 0], "PLEASE RETAIN DATA SOURCE INFORMATION WHENEVER POSSIBLE", titleStyle) + + return worksheet + + + def getTableHeaderForGeno(self, method=None, worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + + if method in ["1","3","4"]: + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb"), sort=0), + THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Record ID', idx=1), + THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Location (Chr and Mb)', idx=2), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=3), + THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=4), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_p_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=5)]] + + for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)', 'Sample r', 'N Cases', 'Sample p(r)']): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + else: + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb"), sort=0), + THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Record ID', idx=1), + THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Location (Chr and Mb)', idx=2), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=3), + THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=4), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_p_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=5)]] + + for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)', 'Sample rho', 'N Cases', 'Sample p(rho)']): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + + return tblobj_header, worksheet + + + def getTableBodyForGeno(self, traitList, formName=None, worksheet=None, newrow=None, corrScript=None): + + tblobj_body = [] + + for thisTrait in traitList: + tr = [] + + trId = str(thisTrait) + + corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) + + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) + + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"),align="left", Class="fs12 fwn ffl b1 c222"), text=thisTrait.name, val=thisTrait.name.upper())) + + #XZ: trait_location_value is used for sorting + trait_location_repr = '--' + trait_location_value = 1000000 + + if thisTrait.chr and thisTrait.mb: + try: + trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb + except: + if thisTrait.chr.upper() == 'X': + trait_location_value = 20*1000 + thisTrait.mb + else: + trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb + + trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) + + tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) + + + repr='%3.3f' % thisTrait.corr + tr.append(TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlot('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"), Class="fs12 fwn ffl b1 c222", nowrap='ON', align='right'),repr,abs(thisTrait.corr))) + + repr = '%d' % thisTrait.nOverlap + tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222",align='right'),repr,thisTrait.nOverlap)) + + repr = webqtlUtil.SciFloat(thisTrait.corrPValue) + tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) + + tblobj_body.append(tr) + + for ncol, item in enumerate([thisTrait.name, trait_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue]): + worksheet.write([newrow, ncol], item) + newrow += 1 + + return tblobj_body, worksheet, corrScript + + + def getTableHeaderForPublish(self, method=None, worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + + if method in ["1","3","4"]: + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), sort=0), + THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Record ID", idx=1), + THCell(HT.TD('Phenotype', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Phenotype", idx=2), + THCell(HT.TD('Authors', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Authors", idx=3), + THCell(HT.TD('Year', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Year", idx=4), + THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS", idx=5), + THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS Location", idx=6), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=7), + THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=8), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_p_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=9)]] + + for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)", "Sample r", "N Cases", "Sample p(r)"]): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + else: + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), sort=0), + THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Record ID", idx=1), + THCell(HT.TD('Phenotype', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Phenotype", idx=2), + THCell(HT.TD('Authors', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Authors", idx=3), + THCell(HT.TD('Year', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Year", idx=4), + THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS", idx=5), + THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS Location", idx=6), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=7), + THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=8), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_p_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=9)]] + + for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)", "Sample rho", "N Cases", "Sample p(rho)"]): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + + return tblobj_header, worksheet + + + def getTableBodyForPublish(self, traitList, formName=None, worksheet=None, newrow=None, corrScript=None, species=''): + + tblobj_body = [] + + for thisTrait in traitList: + tr = [] + + trId = str(thisTrait) + + corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) + + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) + + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn"), nowrap="yes",align="center", Class="fs12 fwn b1 c222"),str(thisTrait.name), thisTrait.name)) + + PhenotypeString = thisTrait.post_publication_description + if thisTrait.confidential: + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=thisTrait.authorized_users): + PhenotypeString = thisTrait.pre_publication_description + + tr.append(TDCell(HT.TD(PhenotypeString, Class="fs12 fwn b1 c222"), PhenotypeString, PhenotypeString.upper())) + + tr.append(TDCell(HT.TD(thisTrait.authors, Class="fs12 fwn b1 c222 fsI"),thisTrait.authors, thisTrait.authors.strip().upper())) + + try: + PubMedLinkText = myear = repr = int(thisTrait.year) + except: + PubMedLinkText = repr = "--" + myear = 0 + if thisTrait.pubmed_id: + PubMedLink = HT.Href(text= repr,url= webqtlConfig.PUBMEDLINK_URL % thisTrait.pubmed_id,target='_blank', Class="fs12 fwn") + else: + PubMedLink = repr + + tr.append(TDCell(HT.TD(PubMedLink, Class="fs12 fwn b1 c222", align='center'), repr, myear)) + + #LRS and its location + LRS_score_repr = '--' + LRS_score_value = 0 + LRS_location_repr = '--' + LRS_location_value = 1000000 + LRS_flag = 1 + + #Max LRS and its Locus location + if thisTrait.lrs and thisTrait.locus: + self.cursor.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """ % (species, thisTrait.locus)) + result = self.cursor.fetchone() + + if result: + if result[0] and result[1]: + LRS_Chr = result[0] + LRS_Mb = result[1] + + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) + except: + if LRS_Chr.upper() == 'X': + LRS_location_value = 20*1000 + float(LRS_Mb) + else: + LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + + + LRS_score_repr = '%3.1f' % thisTrait.lrs + LRS_score_value = thisTrait.lrs + LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) + LRS_flag = 0 + + #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn ffl b1 c222", align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222", align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) + + if LRS_flag: + tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222"), LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) + + repr = '%3.4f' % thisTrait.corr + tr.append(TDCell(HT.TD(HT.Href(text=repr,url="javascript:showCorrPlot('%s', '%s')" % (formName,thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn b1 c222", align='right',nowrap="on"), repr, abs(thisTrait.corr))) + + repr = '%d' % thisTrait.nOverlap + tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.nOverlap)) + + repr = webqtlUtil.SciFloat(thisTrait.corrPValue) + tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) + + tblobj_body.append(tr) + + for ncol, item in enumerate([thisTrait.name, PhenotypeString, thisTrait.authors, thisTrait.year, thisTrait.pubmed_id, LRS_score_repr, LRS_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue]): + worksheet.write([newrow, ncol], item) + newrow += 1 + + return tblobj_body, worksheet, corrScript + + + def getTableHeaderForProbeSet(self, method=None, worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + + if method in ["1","3","4"]: + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), + THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Record ID", idx=1), + THCell(HT.TD('Gene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Gene ID", idx=2), + THCell(HT.TD('Homologene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Homologene ID", idx=3), + THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Symbol", idx=4), + THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Description", idx=5), + THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Location (Chr: Mb)", idx=6), + THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Mean Expr", idx=7), + THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS", idx=8), + THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS Location (Chr: Mb)", idx=9), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=10), + THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=11), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_p_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=12), + THCell(HT.TD(HT.Href( + text = HT.Span('Lit',HT.BR(), 'Corr', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#literatureCorr"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Lit Corr", idx=13), + #XZ, 09/22/2008: tissue correlation + THCell(HT.TD(HT.Href( + text = HT.Span('Tissue',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue r", idx=14), + THCell(HT.TD(HT.Href( + text = HT.Span('Tissue',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_p_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue p(r)", idx=15)]] + + for ncol, item in enumerate(['Record', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr: Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)', 'Sample r', 'N Cases', 'Sample p(r)', 'Lit Corr', 'Tissue r', 'Tissue p(r)']): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + else: + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), + THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Record ID", idx=1), + THCell(HT.TD('Gene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Gene ID", idx=2), + THCell(HT.TD('Homologene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Homologene ID", idx=3), + THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Symbol", idx=4), + THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Description", idx=5), + THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Location (Chr: Mb)", idx=6), + THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Mean Expr", idx=7), + THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS", idx=8), + THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS Location (Chr: Mb)", idx=9), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=10), + THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=11), + THCell(HT.TD(HT.Href( + text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#genetic_p_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=12), + THCell(HT.TD(HT.Href( + text = HT.Span('Lit',HT.BR(), 'Corr', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#literatureCorr"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Lit Corr", idx=13), + #XZ, 09/22/2008: tissue correlation + THCell(HT.TD(HT.Href( + text = HT.Span('Tissue',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue rho", idx=14), + THCell(HT.TD(HT.Href( + text = HT.Span('Tissue',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_p_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue p(rho)", idx=15)]] + + for ncol, item in enumerate(['Record ID', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr: Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)', 'Sample rho', 'N Cases', 'Sample p(rho)', 'Lit Corr', 'Tissue rho', 'Tissue p(rho)']): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + return tblobj_header, worksheet + + + def getTableBodyForProbeSet(self, traitList=[], primaryTrait=None, formName=None, worksheet=None, newrow=None, corrScript=None, species=''): + + tblobj_body = [] + + for thisTrait in traitList: + + if thisTrait.symbol: + pass + else: + thisTrait.symbol = "--" + + if thisTrait.geneid: + symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % thisTrait.geneid, Class="fs12 fwn") + else: + symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&DB=gene&term=%s" % thisTrait.symbol, Class="fs12 fwn") + + tr = [] + + trId = str(thisTrait) + + corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) + + #XZ, 12/08/2008: checkbox + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) + + #XZ, 12/08/2008: probeset name + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName,thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn b1 c222"), thisTrait.name, thisTrait.name.upper())) + + #XZ, 12/08/2008: gene id + if thisTrait.geneid: + tr.append(TDCell(None, thisTrait.geneid, val=999)) + else: + tr.append(TDCell(None, thisTrait.geneid, val=999)) + + #XZ, 12/08/2008: homologene id + if thisTrait.homologeneid: + tr.append(TDCell("", thisTrait.homologeneid, val=999)) + else: + tr.append(TDCell("", thisTrait.homologeneid, val=999)) + + #XZ, 12/08/2008: gene symbol + tr.append(TDCell(HT.TD(symbolurl, Class="fs12 fwn b1 c222 fsI"),thisTrait.symbol, thisTrait.symbol.upper())) + + #XZ, 12/08/2008: description + #XZ, 06/05/2009: Rob asked to add probe target description + description_string = str(thisTrait.description).strip() + target_string = str(thisTrait.probe_target_description).strip() + + description_display = '' + + if len(description_string) > 1 and description_string != 'None': + description_display = description_string + else: + description_display = thisTrait.symbol + + if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + description_display = description_display + '; ' + target_string.strip() + + tr.append(TDCell(HT.TD(description_display, Class="fs12 fwn b1 c222"), description_display, description_display)) + + #XZ: trait_location_value is used for sorting + trait_location_repr = '--' + trait_location_value = 1000000 + + if thisTrait.chr and thisTrait.mb: + try: + trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb + except: + if thisTrait.chr.upper() == 'X': + trait_location_value = 20*1000 + thisTrait.mb + else: + trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb + + trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) + + tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) + + """ + #XZ, 12/08/2008: chromosome number + #XZ, 12/10/2008: use Mbvalue to sort chromosome + tr.append(TDCell( HT.TD(thisTrait.chr, Class="fs12 fwn b1 c222", align='right'), thisTrait.chr, Mbvalue) ) + + #XZ, 12/08/2008: Rob wants 6 digit precision, and we have to deal with that the mb could be None + if not thisTrait.mb: + tr.append(TDCell(HT.TD(thisTrait.mb, Class="fs12 fwn b1 c222",align='right'), thisTrait.mb, Mbvalue)) + else: + tr.append(TDCell(HT.TD('%.6f' % thisTrait.mb, Class="fs12 fwn b1 c222", align='right'), thisTrait.mb, Mbvalue)) + """ + + + + #XZ, 01/12/08: This SQL query is much faster. + self.cursor.execute(""" + select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %d and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' + """ % (thisTrait.db.id, thisTrait.name)) + result = self.cursor.fetchone() + if result: + if result[0]: + mean = result[0] + else: + mean=0 + else: + mean = 0 + + #XZ, 06/05/2009: It is neccessary to turn on nowrap + repr = "%2.3f" % mean + tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right', nowrap='ON'),repr, mean)) + + #LRS and its location + LRS_score_repr = '--' + LRS_score_value = 0 + LRS_location_repr = '--' + LRS_location_value = 1000000 + LRS_flag = 1 + + #Max LRS and its Locus location + if thisTrait.lrs and thisTrait.locus: + self.cursor.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """ % (species, thisTrait.locus)) + result = self.cursor.fetchone() + + if result: + if result[0] and result[1]: + LRS_Chr = result[0] + LRS_Mb = result[1] + + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) + except: + if LRS_Chr.upper() == 'X': + LRS_location_value = 20*1000 + float(LRS_Mb) + else: + LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + + + LRS_score_repr = '%3.1f' % thisTrait.lrs + LRS_score_value = thisTrait.lrs + LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) + LRS_flag = 0 + + #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn ffl b1 c222", align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222", align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), LRS_location_repr, LRS_location_value)) + + if LRS_flag: + tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222"), LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) + + + #XZ, 12/08/2008: generic correlation + repr='%3.3f' % thisTrait.corr + tr.append(TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlot('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"), Class="fs12 fwn ffl b1 c222", align='right'),repr,abs(thisTrait.corr))) + + #XZ, 12/08/2008: number of overlaped cases + repr = '%d' % thisTrait.nOverlap + tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.nOverlap)) + + #XZ, 12/08/2008: p value of genetic correlation + repr = webqtlUtil.SciFloat(thisTrait.corrPValue) + tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) + + #XZ, 12/08/2008: literature correlation + LCorr = 0.0 + LCorrStr = "--" + if hasattr(thisTrait, 'LCorr') and thisTrait.LCorr: + LCorr = thisTrait.LCorr + LCorrStr = "%2.3f" % thisTrait.LCorr + tr.append(TDCell(HT.TD(LCorrStr, Class="fs12 fwn b1 c222", align='right'), LCorrStr, abs(LCorr))) + + #XZ, 09/22/2008: tissue correlation. + TCorr = 0.0 + TCorrStr = "--" + #XZ, 11/20/2008: need to pass two geneids: input_trait_mouse_geneid and thisTrait.mouse_geneid + if hasattr(thisTrait, 'tissueCorr') and thisTrait.tissueCorr: + TCorr = thisTrait.tissueCorr + TCorrStr = "%2.3f" % thisTrait.tissueCorr + # NL, 07/19/2010: add a new parameter rankOrder for js function 'showTissueCorrPlot' + rankOrder = self.RANK_ORDERS[self.method] + TCorrPlotURL = "javascript:showTissueCorrPlot('%s','%s','%s',%d)" %(formName, primaryTrait.symbol, thisTrait.symbol,rankOrder) + tr.append(TDCell(HT.TD(HT.Href(text=TCorrStr, url=TCorrPlotURL, Class="fs12 fwn ff1"), Class="fs12 fwn ff1 b1 c222", align='right'), TCorrStr, abs(TCorr))) + else: + tr.append(TDCell(HT.TD(TCorrStr, Class="fs12 fwn b1 c222", align='right'), TCorrStr, abs(TCorr))) + + #XZ, 12/08/2008: p value of tissue correlation + TPValue = 1.0 + TPValueStr = "--" + if hasattr(thisTrait, 'tissueCorr') and thisTrait.tissuePValue: #XZ, 09/22/2008: thisTrait.tissuePValue can't be used here because it could be 0 + TPValue = thisTrait.tissuePValue + TPValueStr = "%2.3f" % thisTrait.tissuePValue + tr.append(TDCell(HT.TD(TPValueStr, Class="fs12 fwn b1 c222", align='right'), TPValueStr, TPValue)) + + tblobj_body.append(tr) + + for ncol, item in enumerate([thisTrait.name, thisTrait.geneid, thisTrait.homologeneid, thisTrait.symbol, thisTrait.description, trait_location_repr, mean, LRS_score_repr, LRS_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue, LCorr, TCorr, TPValue]): + worksheet.write([newrow, ncol], item) + + newrow += 1 + + return tblobj_body, worksheet, corrScript + diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 374e7c95..7cdc350f 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -19,9 +19,9 @@ from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil from base.trait import GeneralTrait -from base.data_set import create_dataset +from base import data_set from base.templatePage import templatePage -from utility import webqtlUtil +from utility import webqtlUtil, helper_functions from base import webqtlConfig from dbFunction import webqtlDatabaseFunction from base.GeneralObject import GeneralObject @@ -54,10 +54,8 @@ class MarkerRegression(object): #print("start_vars are: ", pf(start_vars)) - self.dataset = create_dataset(start_vars['dataset_name']) - self.this_trait = GeneralTrait(dataset=self.dataset.name, - name=start_vars['trait_id'], - cellid=None) + helper_functions.get_dataset_and_trait(self, start_vars) + self.num_perm = int(start_vars['num_perm']) # Passed in by the form (user might have edited) @@ -67,9 +65,6 @@ class MarkerRegression(object): self.vals = [] self.variances = [] - self.dataset.group.read_genotype_file() - self.genotype = self.dataset.group.genotype - assert start_vars['display_all_lrs'] in ('True', 'False') self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 9bd45905..603c40f5 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -14,9 +14,9 @@ from htmlgen import HTMLgen2 as HT from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList -from utility import webqtlUtil, Plot, Bunch +from utility import webqtlUtil, Plot, Bunch, helper_functions from base.trait import GeneralTrait -from base.data_set import create_dataset +from base import data_set from dbFunction import webqtlDatabaseFunction from base.templatePage import templatePage from basicStatistics import BasicStatisticsFunctions @@ -38,17 +38,19 @@ class ShowTrait(object): print("in ShowTrait, kw are:", kw) self.trait_id = kw['trait_id'] - self.dataset = create_dataset(kw['dataset']) + helper_functions.get_dataset_and_trait(self, kw) - #self.cell_id = None - - - this_trait = GeneralTrait(dataset=self.dataset.name, - name=self.trait_id, - cellid=None) - - - self.dataset.group.read_genotype_file() + #self.dataset = create_dataset(kw['dataset']) + # + ##self.cell_id = None + # + # + #this_trait = GeneralTrait(dataset=self.dataset.name, + # name=self.trait_id, + # cellid=None) + # + # + #self.dataset.group.read_genotype_file() if not self.dataset.group.genotype: self.read_data(include_f1=True) @@ -101,23 +103,22 @@ class ShowTrait(object): #hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor, # groupName=fd.group) - self.dispTraitInformation(kw, "", hddn, this_trait) #Display trait information + function buttons + self.dispTraitInformation(kw, "", hddn, self.this_trait) #Display trait information + function buttons #if this_trait == None: # this_trait = webqtlTrait(data=kw['allTraitData'], dataset=None) - self.build_correlation_tools(this_trait) + self.build_correlation_tools(self.this_trait) - self.make_sample_lists(this_trait) + self.make_sample_lists(self.this_trait) if self.dataset.group.allsamples: hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ') hddn['trait_id'] = self.trait_id - hddn['dataset_name'] = self.dataset.name + hddn['dataset'] = self.dataset.name # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self - self.this_trait = this_trait self.hddn = hddn self.sample_group_types = OrderedDict() diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index c9659a83..472548f0 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -19,7 +19,7 @@ from wqflask import search_results from wqflask.show_trait import show_trait from wqflask.show_trait import export_trait_data from wqflask.marker_regression import marker_regression -from wqflask.correlation import CorrelationPage +from wqflask.correlation import show_corr_results from wqflask.dataSharing import SharingInfo, SharingInfoPage @@ -161,8 +161,8 @@ def marker_regression_page(): @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): print("In corr_compute, request.args is:", pf(request.form)) - fd = webqtlFormData.webqtlFormData(request.form) - template_vars = CorrelationPage.CorrelationPage(fd) + #fd = webqtlFormData.webqtlFormData(request.form) + template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) @app.route("/int_mapping", methods=('POST',)) -- cgit 1.4.1 From 01283a27bf9cc78653059236fa55d6063558ab21 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 22 Jan 2013 17:24:26 -0600 Subject: Pushed through a few errors in getting the correlation page running --- wqflask/utility/helper_functions.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 25 ++++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'wqflask') diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 920d9ac6..9ecad993 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -5,7 +5,7 @@ from base import data_set def get_dataset_and_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" - self.dataset = data_set.create_dataset(start_vars['dataset']) + self.dataset = data_set.create_dataset(start_vars['dataset_name']) self.this_trait = GeneralTrait(dataset=self.dataset.name, name=start_vars['trait_id'], cellid=None) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 23dd1534..b82f1c59 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -374,7 +374,7 @@ class CorrelationResults(object): #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' self.tissue_probeset_freeze_id = 1 - traitList = self.correlate(self.vals) + traitList = self.correlate() _log.info("Done doing correlation calculation") @@ -823,18 +823,17 @@ Resorting this table
"""Returns the name of the reference database file with which correlations are calculated. Takes argument cursor which is a cursor object of any instance of a subclass of templatePage Used by correlationPage""" -ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name - self.cursor.execute(query) - result = self.cursor.fetchone() - Id = result[0] - FullName = result[1] - FullName = FullName.replace(' ','_') - FullName = FullName.replace('/','_') - FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt' + trait_id, full_name = g.db.execute("""SELECT Id, FullName + FROM ProbeSetFreeze + WHERE Name = '%s'""" % target_db_name).fetchone() + for char in [' ', '/']: + full_name = full_name.replace(char, '_') + + file_name = 'ProbeSetFreezeId_' + str(trait_id) + '_FullName_' + full_name + '.txt' + + return file_name - return FileName - query = 'SELECT Id, FullName F #XZ, 01/29/2009: I modified this function. @@ -1262,7 +1261,7 @@ ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name #_log.info("Using the slow method for correlation") # #_log.info("Fetching from database") - traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) + traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) #_log.info("Done fetching from database") totalTraits = len(traits) #XZ, 09/18/2008: total trait number @@ -1339,7 +1338,7 @@ ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name #cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR) # If the cache file exists, do a cached correlation for probeset data - if self.db.type == "ProbeSet": + if self.dataset.type == "ProbeSet": # if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available: # traits = do_parallel_correlation() # -- cgit 1.4.1 From 59ed965cff915b1a96c387d947af9f673512c627 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 25 Jan 2013 21:00:08 +0000 Subject: Fixed a few bugs while trying to get the code running on the new server --- wqflask/base/webqtlConfig.py | 4 ++-- wqflask/requirements.txt | 8 -------- wqflask/wqflask/correlation/correlationFunction.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 2 +- wqflask/wqflask/search_results.py | 6 ++++-- wqflask/wqflask/show_trait/SampleList.py | 2 +- 6 files changed, 9 insertions(+), 15 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 755595e0..d5f09b64 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -63,8 +63,8 @@ CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/' ChangableHtmlPath = GNROOT + 'web/' SITENAME = 'GN' -PORTADDR = "http://132.192.47.32" -BASEHREF = '' +PORTADDR = "http://50.16.251.170" +BASEHREF = '' INFOPAGEHREF = '/dbdoc/%s.html' GLOSSARYFILE = "/glossary.html" CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' diff --git a/wqflask/requirements.txt b/wqflask/requirements.txt index d775ff21..cd75ae9a 100644 --- a/wqflask/requirements.txt +++ b/wqflask/requirements.txt @@ -3,20 +3,12 @@ Jinja2==2.6 MySQL-python==1.2.3 Piddle==0.1-dev PyYAML==3.10 -Reaper==1.0 SQLAlchemy==0.7.9 Werkzeug==0.8.3 -ipython==0.13.1 logging-tree==1.1 -logilab-astng==0.24.1 -logilab-common==0.58.3 -mercurial==2.4.2 nose==1.2.1 -numarray==1.5.2 numpy==1.6.2 pp==1.6.3 -pylint==0.26.0 -readline==6.2.4.1 requests==0.13.3 rpy2==2.3.1 scipy==0.11.0 diff --git a/wqflask/wqflask/correlation/correlationFunction.py b/wqflask/wqflask/correlation/correlationFunction.py index 2c1870fe..7d4b58a9 100644 --- a/wqflask/wqflask/correlation/correlationFunction.py +++ b/wqflask/wqflask/correlation/correlationFunction.py @@ -31,7 +31,7 @@ import pp import string from utility import webqtlUtil -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from dbFunction import webqtlDatabaseFunction diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index b82f1c59..96298b37 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -46,7 +46,7 @@ import reaper from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from base import data_set from base.templatePage import templatePage from utility import webqtlUtil, helper_functions diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index d986a2e0..ae1cadd0 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function +from __future__ import absolute_import, print_function, division from wqflask import app @@ -16,6 +16,8 @@ import datetime from pprint import pformat as pf +from flask import Flask, g + # Instead of importing HT we're going to build a class below until we can eliminate it from htmlgen import HTMLgen2 as HT @@ -45,7 +47,7 @@ class SearchResultPage(templatePage): #logging_tree.printout() self.fd = fd templatePage.__init__(self, fd) - assert self.openMysql(), "Couldn't open MySQL" + #assert self.openMysql(), "Couldn't open MySQL" print("fd is:", pf(fd)) print("fd.dict is:", pf(fd['dataset'])) diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index d98a810a..1130fb60 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -4,7 +4,7 @@ from flask import Flask, g from base import webqtlCaseData from utility import webqtlUtil, Plot, Bunch -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from pprint import pformat as pf -- cgit 1.4.1 From aa1cff2d2bae08dadb6e9f8be759c4f13a974e73 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 31 Jan 2013 20:41:51 +0000 Subject: Changed fd to kw in search_results.py, commented out some lines related to fd --- wqflask/base/data_set.py | 1 - wqflask/wqflask/search_results.py | 40 ++++++++++++++++----------------------- 2 files changed, 16 insertions(+), 25 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 7088913c..8ced1528 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -41,7 +41,6 @@ from pprint import pformat as pf DS_NAME_MAP = {} def create_dataset(dataset_name): - #cursor = db_conn.cursor() print("dataset_name:", dataset_name) query = """ diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index ae1cadd0..1be1185c 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -26,7 +26,6 @@ from utility.THCell import THCell from utility.TDCell import TDCell from base.data_set import create_dataset from base.trait import GeneralTrait -from base.templatePage import templatePage from wqflask import parser from wqflask import do_search from utility import webqtlUtil @@ -37,22 +36,18 @@ from utility import formatting #from base.JinjaPage import JinjaEnv, JinjaPage -class SearchResultPage(templatePage): +class SearchResultPage(): #maxReturn = 3000 - def __init__(self, fd): + def __init__(self, kw): print("initing SearchResultPage") #import logging_tree #logging_tree.printout() - self.fd = fd - templatePage.__init__(self, fd) + #self.fd = fd + #templatePage.__init__(self, fd) #assert self.openMysql(), "Couldn't open MySQL" - print("fd is:", pf(fd)) - print("fd.dict is:", pf(fd['dataset'])) - self.dataset = fd['dataset'] - # change back to self.dataset #if not self.dataset or self.dataset == 'spacer': # #Error, No dataset selected @@ -62,20 +57,17 @@ class SearchResultPage(templatePage): ########################################### # All Phenotypes is a special case we'll deal with later - if self.dataset == "All Phenotypes": - self.cursor.execute(""" - select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze, - InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = - PublishFreeze.InbredSetId""") - results = self.cursor.fetchall() - self.dataset = map(lambda x: DataSet(x[0], self.cursor), results) - self.dataset_groups = map(lambda x: x[1], results) - self.dataset_group_ids = map(lambda x: x[2], results) - else: - print("self.dataset is:", pf(self.dataset)) - # Replaces a string with an object - self.dataset = create_dataset(self.dataset) - print("self.dataset is now:", pf(self.dataset)) + #if kw['dataset'] == "All Phenotypes": + # self.cursor.execute(""" + # select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze, + # InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = + # PublishFreeze.InbredSetId""") + # results = self.cursor.fetchall() + # self.dataset = map(lambda x: DataSet(x[0], self.cursor), results) + # self.dataset_groups = map(lambda x: x[1], results) + # self.dataset_group_ids = map(lambda x: x[2], results) + #else: + self.dataset = create_dataset(kw['dataset']) self.search() self.gen_search_result() @@ -110,7 +102,7 @@ class SearchResultPage(templatePage): def search(self): - self.search_terms = parser.parse(self.fd['search_terms']) + self.search_terms = parser.parse(self.kw['search_terms']) print("After parsing:", self.search_terms) self.results = [] -- cgit 1.4.1 From b82eb4a59edb4e1d8bbf3588edeb13e38ead052e Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 31 Jan 2013 15:08:01 -0600 Subject: Changed reference to 'dataset_name' in keywords to 'dataset' to get page to work, but will change all references to the dataset name to 'dataset_name' in future to avoid confusion between the dataset name and the actual dataset object --- wqflask/utility/helper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask') diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 9ecad993..920d9ac6 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -5,7 +5,7 @@ from base import data_set def get_dataset_and_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" - self.dataset = data_set.create_dataset(start_vars['dataset_name']) + self.dataset = data_set.create_dataset(start_vars['dataset']) self.this_trait = GeneralTrait(dataset=self.dataset.name, name=start_vars['trait_id'], cellid=None) -- cgit 1.4.1 From c8c0cfd268c5bdd2868a087c41d42051fde93d50 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 31 Jan 2013 21:48:34 +0000 Subject: Copied over nginx.conf from /etc/nginx --- wqflask/other_config/nginx-old.conf | 108 ++++++++++++++++++++++++++++++++++++ wqflask/other_config/nginx.conf | 108 ------------------------------------ 2 files changed, 108 insertions(+), 108 deletions(-) create mode 100644 wqflask/other_config/nginx-old.conf delete mode 100644 wqflask/other_config/nginx.conf (limited to 'wqflask') diff --git a/wqflask/other_config/nginx-old.conf b/wqflask/other_config/nginx-old.conf new file mode 100644 index 00000000..65ee768c --- /dev/null +++ b/wqflask/other_config/nginx-old.conf @@ -0,0 +1,108 @@ + +#user nobody; +worker_processes 1; + +#error_log logs/error.log; +#error_log logs/error.log notice; +#error_log logs/error.log info; + +#pid logs/nginx.pid; + + +events { + worker_connections 1024; +} + + +http { + include mime.types; + default_type application/octet-stream; + + #log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + # '$status $body_bytes_sent "$http_referer" ' + # '"$http_user_agent" "$http_x_forwarded_for"'; + + #access_log logs/access.log main; + + sendfile on; + #tcp_nopush on; + + #keepalive_timeout 0; + keepalive_timeout 65; + + gzip on; + + server { + # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ + listen 80; + + server_name _; + + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + + location ^~ /css/ { + root /home/sam/gene/wqflask/wqflask/static/; + } + + location ^~ /javascript/ { + root /home/sam/gene/wqflask/wqflask/static/; + } + +# location ^~ /image/ { +# root /home/sam/gene/wqflask/wqflask/static/; +# } + + location ^~ /images/ { + root /home/sam/gene/wqflask/wqflask/static/; + } + + location / { + proxy_pass http://127.0.0.1:5000/; + proxy_redirect off; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + } + + + # another virtual host using mix of IP-, name-, and port-based configuration + # + #server { + # listen 8000; + # listen somename:8080; + # server_name somename alias another.alias; + + # location / { + # root html; + # index index.html index.htm; + # } + #} + + + # HTTPS server + # + #server { + # listen 443; + # server_name localhost; + + # ssl on; + # ssl_certificate cert.pem; + # ssl_certificate_key cert.key; + + # ssl_session_timeout 5m; + + # ssl_protocols SSLv2 SSLv3 TLSv1; + # ssl_ciphers HIGH:!aNULL:!MD5; + # ssl_prefer_server_ciphers on; + + # location / { + # root html; + # index index.html index.htm; + # } + #} + +} diff --git a/wqflask/other_config/nginx.conf b/wqflask/other_config/nginx.conf deleted file mode 100644 index 65ee768c..00000000 --- a/wqflask/other_config/nginx.conf +++ /dev/null @@ -1,108 +0,0 @@ - -#user nobody; -worker_processes 1; - -#error_log logs/error.log; -#error_log logs/error.log notice; -#error_log logs/error.log info; - -#pid logs/nginx.pid; - - -events { - worker_connections 1024; -} - - -http { - include mime.types; - default_type application/octet-stream; - - #log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - # '$status $body_bytes_sent "$http_referer" ' - # '"$http_user_agent" "$http_x_forwarded_for"'; - - #access_log logs/access.log main; - - sendfile on; - #tcp_nopush on; - - #keepalive_timeout 0; - keepalive_timeout 65; - - gzip on; - - server { - # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ - listen 80; - - server_name _; - - access_log /var/log/nginx/access.log; - error_log /var/log/nginx/error.log; - - location ^~ /css/ { - root /home/sam/gene/wqflask/wqflask/static/; - } - - location ^~ /javascript/ { - root /home/sam/gene/wqflask/wqflask/static/; - } - -# location ^~ /image/ { -# root /home/sam/gene/wqflask/wqflask/static/; -# } - - location ^~ /images/ { - root /home/sam/gene/wqflask/wqflask/static/; - } - - location / { - proxy_pass http://127.0.0.1:5000/; - proxy_redirect off; - - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - - } - - - # another virtual host using mix of IP-, name-, and port-based configuration - # - #server { - # listen 8000; - # listen somename:8080; - # server_name somename alias another.alias; - - # location / { - # root html; - # index index.html index.htm; - # } - #} - - - # HTTPS server - # - #server { - # listen 443; - # server_name localhost; - - # ssl on; - # ssl_certificate cert.pem; - # ssl_certificate_key cert.key; - - # ssl_session_timeout 5m; - - # ssl_protocols SSLv2 SSLv3 TLSv1; - # ssl_ciphers HIGH:!aNULL:!MD5; - # ssl_prefer_server_ciphers on; - - # location / { - # root html; - # index index.html index.htm; - # } - #} - -} -- cgit 1.4.1 From 20efd30065bf95d7c8de9e69b0e1a75de7c47061 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 31 Jan 2013 23:59:09 +0000 Subject: Added wqflask-nginx.conf --- wqflask/other_config/wqflask-nginx.conf | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 wqflask/other_config/wqflask-nginx.conf (limited to 'wqflask') diff --git a/wqflask/other_config/wqflask-nginx.conf b/wqflask/other_config/wqflask-nginx.conf new file mode 100644 index 00000000..c5ab0e5c --- /dev/null +++ b/wqflask/other_config/wqflask-nginx.conf @@ -0,0 +1,40 @@ +server { + # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ + listen 80; + + server_name _; + + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + + location ^~ /css/ { + root /gene/wqflask/wqflask/static/; + } + + location ^~ /javascript/ { + root /gene/wqflask/wqflask/static/; + } + +# location ^~ /image/ { +# root /gene/wqflask/wqflask/static/; +# } + + location ^~ /images/ { + root /gene/wqflask/wqflask/static/; + } + + ### New - added by Sam + #location ^~ /static/ { + # root /gene/wqflask/wqflask/static/; + #} + + location / { + proxy_pass http://127.0.0.1:5000/; + proxy_redirect off; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + +} -- cgit 1.4.1 From c01928f9f2c130c8adc488ef9435c28f5542e565 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 6 Feb 2013 15:14:23 -0600 Subject: Small change to search_results.py to get search results page to work --- wqflask/wqflask/search_results.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 1be1185c..d20d7d89 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -69,6 +69,8 @@ class SearchResultPage(): #else: self.dataset = create_dataset(kw['dataset']) + self.search_terms = kw['search_terms'] + self.search() self.gen_search_result() @@ -102,7 +104,7 @@ class SearchResultPage(): def search(self): - self.search_terms = parser.parse(self.kw['search_terms']) + self.search_terms = parser.parse(self.search_terms) print("After parsing:", self.search_terms) self.results = [] -- cgit 1.4.1 From 471480255a28ae8743f4b736057a8d54585b1eca Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 6 Feb 2013 15:15:27 -0600 Subject: Added changes to example.py and prep_data.py that lets nick's code read in our data --- wqflask/wqflask/pylmm | 1 + 1 file changed, 1 insertion(+) create mode 160000 wqflask/wqflask/pylmm (limited to 'wqflask') diff --git a/wqflask/wqflask/pylmm b/wqflask/wqflask/pylmm new file mode 160000 index 00000000..cede848b --- /dev/null +++ b/wqflask/wqflask/pylmm @@ -0,0 +1 @@ +Subproject commit cede848b7ce648366c1bdd7bc5df43c633eeb0d7 -- cgit 1.4.1 From d75fc63891f617fbe8b2b030fdce80b1628c6a41 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 6 Feb 2013 18:32:30 -0600 Subject: Edited genofile_parser.py to go through all genofiles and convert them to the format used by nick's code --- wqflask/wqflask/my_pylmm/data/genofile_parser.py | 118 ++++++++ wqflask/wqflask/my_pylmm/data/prep_data.py | 64 +++++ wqflask/wqflask/my_pylmm/example.py | 58 ++++ wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 0 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 341 +++++++++++++++++++++++ 5 files changed, 581 insertions(+) create mode 100644 wqflask/wqflask/my_pylmm/data/genofile_parser.py create mode 100644 wqflask/wqflask/my_pylmm/data/prep_data.py create mode 100644 wqflask/wqflask/my_pylmm/example.py create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/__init__.py create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm.py (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py new file mode 100644 index 00000000..1dafecc8 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -0,0 +1,118 @@ +#!/usr/bin/python + +from __future__ import print_function, division, absolute_import +import csv +import os +import glob +import traceback + +class EmptyConfigurations(Exception): pass + +class ConvertGenoFile(object): + + def __init__(self, input_file, output_file): + + self.input_file = input_file + self.output_file = output_file + + self.latest_row_pos = None + self.latest_col_pos = None + + self.latest_row_value = None + self.latest_col_value = None + + def convert(self): + + self.prefer_config = { + '@mat': "1", + '@pat': "0", + '@het': "0.5", + '@unk': "NA" + } + + self.configurations = {} + self.skipped_cols = 3 + + self.input_fh = open(self.input_file) + + + with open(self.output_file, "w") as self.output_fh: + self.process_csv() + + + + #def process_row(self, row): + # counter = 0 + # for char in row: + # if char + # counter += 1 + + def process_csv(self): + for row_count, row in enumerate(self.process_rows()): + #self.latest_row_pos = row_count + + for item_count, item in enumerate(row.split()[self.skipped_cols:]): + # print('configurations:', str(configurations)) + self.latest_col_pos = item_count + self.skipped_cols + self.latest_col_value = item + if item_count != 0: + self.output_fh.write(" ") + self.output_fh.write(self.configurations[item.upper()]) + + self.output_fh.write("\n") + + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + self.latest_row_value = row + # Take care of headers + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.skipped_cols = 4 + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.prefer_config: + self.configurations[value] = self.prefer_config[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row + + @classmethod + def process_all(cls, old_directory, new_directory): + os.chdir(old_directory) + for input_file in glob.glob("*.geno"): + group_name = input_file.split('.')[0] + output_file = os.path.join(new_directory, group_name + ".snps") + print("%s -> %s" % (input_file, output_file)) + convertob = ConvertGenoFile(input_file, output_file) + try: + convertob.convert() + except EmptyConfigurations as why: + print(" No config info? Continuing...") + #excepted = True + continue + except Exception as why: + + print(" Exception:", why) + print(traceback.print_exc()) + print(" Found in row %i at tabular column %i" % (convertob.latest_row_pos, + convertob.latest_col_pos)) + print(" Column is:", convertob.latest_col_value) + print(" Row is:", convertob.latest_row_value) + break + + +if __name__=="__main__": + Old_Geno_Directory = """/home/zas1024/gene/web/genotypes/""" + New_Geno_Directory = """/home/zas1024/gene/web/new_genotypes/""" + #Input_File = """/home/zas1024/gene/web/genotypes/BXD.geno""" + #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" + ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) + #ConvertGenoFiles(Geno_Directory) + + #process_csv(Input_File, Output_File) \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/data/prep_data.py b/wqflask/wqflask/my_pylmm/data/prep_data.py new file mode 100644 index 00000000..b7a133c2 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/data/prep_data.py @@ -0,0 +1,64 @@ +#!/usr/bin/python + +from __future__ import absolute_import, print_function, division +import numpy + + +class PrepData(object): + def __init__(self, exprs_file, snps_file): + self.exprs_file = exprs_file + self.snps_file = snps_file + self.empty_columns = set() + #self.identify_no_genotype_samples() + self.identify_empty_samples() + self.trim_files() + + def identify_empty_samples(self): + with open(self.exprs_file) as fh: + for line in fh: + for pos, item in enumerate(line.split()): + if item == "NA": + self.empty_columns.add(pos) + #print("self.empty_columns:", self.empty_columns) + nums = set(range(0, 176)) + print("not included:", nums-self.empty_columns) + + #def identify_no_genotype_samples(self): + # #for this_file in (self.exprs_file, self.snps_file): + # #with open(this_file) as fh: + # no_geno_samples = [] + # has_genotypes = False + # with open(self.snps_file) as fh: + # for line in fh: + # num_samples = len(line.split()) + # break + # for sample in range (num_samples): + # for line in fh: + # if line.split()[sample] != "NA": + # has_genotypes = True + # break + # if has_genotypes == False: + # no_geno_samples.append(sample) + # + # print(no_geno_samples) + + def trim_files(self): + for this_file in (self.exprs_file, self.snps_file): + input_file = open(this_file) + this_file_name_output = this_file + ".new" + with open(this_file_name_output, "w") as output: + for line in input_file: + data_wanted = [] + for pos, item in enumerate(line.split()): + if pos in self.empty_columns: + continue + else: + data_wanted.append("%2s" % (item)) + #print("data_wanted is", data_wanted) + output.write(" ".join(data_wanted) + "\n") + print("Done writing file:", this_file_name_output) + +if __name__=="__main__": + exprs_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.exprs.1""" + snps_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" + PrepData(exprs_file, snps_file) \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py new file mode 100644 index 00000000..0348d67b --- /dev/null +++ b/wqflask/wqflask/my_pylmm/example.py @@ -0,0 +1,58 @@ +#!/usr/bin/python + +from __future__ import absolute_import, print_function, division + +import sys +import time + +import numpy as np +from pyLMM import lmm + +from pprint import pformat as pf + + +Y = np.genfromtxt('data/mdp.exprs.1.new') +print("exprs is:", pf(Y.shape)) + +# Loading npdump and first 1000 snps for speed +#K = np.load('data/hmdp.liver.K.npdump') +#snps = np.load('data/hmdp.liver.snps.1000.npdump').T + +# These three lines will load all SNPs (from npdump or from txt) and +# calculate the kinship +snps = np.genfromtxt('data/mdp.snps.1000.new').T +print("snps is:", pf(snps.shape)) +#snps = snps[~np.isnan(snps).all(axis=1)] +#print ("snps is now:", pf(snps)) +np.savetxt("/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.trimmed", snps, fmt='%s', delimiter=' ') +#snps = np.load('data/hmdp.liver.snps.npdump').T +K = lmm.calculateKinship(snps) +#print("K is:", pf(K)) +#print("Y is:", pf(Y.shape)) + +# Instantiate a LMM object for the phentoype Y and fit the null model +L = lmm.LMM(Y,K) +L.fit() + +# Manually calculate the association at one SNP +X = snps[:,0] +X[np.isnan(X)] = X[True - np.isnan(X)].mean() # Fill missing with MAF +X = X.reshape(len(X),1) +if X.var() == 0: ts,ps = (np.nan,np.nan) +else: ts,ps = L.association(X) + +# If I want to refit the variance component +L.fit(X=X) +ts,ps = L.association(X) + +# If I want to do a genome-wide scan over the 1000 SNPs. +# This call will use REML (REML = False means use ML). +# It will also refit the variance components for each SNP. +# Setting refit = False will cause the program to fit the model once +# and hold those variance component estimates for each SNP. +begin = time.time() +TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=False) +print("TS is:", pf(TS)) +print("PS is:", pf(PS)) +end = time.time() +sys.stderr.write("Total time for 1000 SNPs: %0.3f\n" % (end- begin)) \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py new file mode 100644 index 00000000..7fe599c4 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -0,0 +1,341 @@ +# pyLMM software Copyright 2012, Nicholas A. Furlotte +# Version 0.1 + +#License Details +#--------------- + +# The program is free for academic use. Please contact Nick Furlotte +# if you are interested in using the software for +# commercial purposes. + +# The software must not be modified and distributed without prior +# permission of the author. +# Any instance of this software must retain the above copyright notice. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import absolute_import, print_function, division + +import sys +import time +import numpy as np +import numpy.linalg as linalg +from scipy import optimize +from scipy import stats +#import matplotlib.pyplot as pl +import pdb + +from pprint import pformat as pf + +def calculateKinship(W): + """ + W is an n x m matrix encoding SNP minor alleles. + """ + n = W.shape[0] + m = W.shape[1] + keep = [] + for i in range(m): + mn = W[True - np.isnan(W[:,i]),i].mean() + W[np.isnan(W[:,i]),i] = mn + vr = W[:,i].var() + if vr == 0: continue + + keep.append(i) + W[:,i] = (W[:,i] - mn) / np.sqrt(vr) + + W = W[:,keep] + K = np.dot(W,W.T) * 1.0/float(m) + return K + +def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): + """ + Performs a basic GWAS scan using the LMM. This function + uses the LMM module to assess association at each SNP and + does some simple cleanup, such as removing missing individuals + per SNP and re-computing the eigen-decomp + + Y - n x 1 phenotype vector + X - n x m SNP matrix + K - n x n kinship matrix + Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K + X0 - n x q covariate matrix + REML - use restricted maximum likelihood + refit - refit the variance component for each SNP + """ + n = X.shape[0] + m = X.shape[1] + + if X0 == None: X0 = np.ones((n,1)) + + # Remove missing values in Y and adjust associated parameters + v = np.isnan(Y) + if v.sum(): + keep = True - v + Y = Y[keep] + X = X[keep,:] + X0 = X0[keep,:] + K = K[keep,:][:,keep] + Kva = [] + Kve = [] + + L = LMM(Y,K,Kva,Kve,X0) + if not refit: L.fit() + + PS = [] + TS = [] + + for i in range(m): + x = X[:,i].reshape((n,1)) + v = np.isnan(x).reshape((-1,)) + if v.sum(): + keep = True - v + xs = x[keep,:] + if xs.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue + + Ys = Y[keep] + X0s = X0[keep,:] + Ks = K[keep,:][:,keep] + Ls = LMM(Ys,Ks,X0=X0s) + if refit: Ls.fit(X=xs) + else: Ls.fit() + ts,ps = Ls.association(xs,REML=REML) + else: + if x.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue + + if refit: L.fit(X=x) + ts,ps = L.association(x,REML=REML) + + PS.append(ps) + TS.append(ts) + + return TS,PS + +class LMM: + + """ + This is a simple version of EMMA/fastLMM. + The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) + and to optimize this model by finding the maximum-likelihood estimates for the model parameters. + There are three model parameters: heritability (h), covariate coefficients (beta) and the total + phenotypic variance (sigma). + Heritability as defined here is the proportion of the total variance (sigma) that is attributed to + the kinship matrix. + + For simplicity, we assume that everything being input is a numpy array. + If this is not the case, the module may throw an error as conversion from list to numpy array + is not done consistently. + + """ + def __init__(self,Y,K,Kva=[],Kve=[],X0=None): + + """ + The constructor takes a phenotype vector or array of size n. + It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. + If they are not provided, the constructor will calculate them. + X0 is an optional covariate matrix of size n x q, where there are q covariates. + When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ + + if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + + x = Y != -9 + if not x.sum() == len(Y): + sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) + Y = Y[x] + K = K[x,:][:,x] + X0 = X0[x,:] + Kva = [] + Kve = [] + self.nonmissing = x + + if len(Kva) == 0 or len(Kve) == 0: + sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + begin = time.time() + Kva,Kve = linalg.eigh(K) + end = time.time() + sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + self.K = K + self.Kva = Kva + self.Kve = Kve + self.Y = Y + self.X0 = X0 + self.N = self.K.shape[0] + + self.transform() + + def transform(self): + + """ + Computes a transformation on the phenotype vector and the covariate matrix. + The transformation is obtained by left multiplying each parameter by the transpose of the + eigenvector matrix of K (the kinship). + """ + + print(len(self.Kve.T)) + print(len(self.Y)) + + self.Yt = np.dot(self.Kve.T, self.Y) + self.X0t = np.dot(self.Kve.T, self.X0) + + def getMLSoln(self,h,X): + + """ + Obtains the maximum-likelihood estimates for the covariate coefficients (beta), + the total variance of the trait (sigma) and also passes intermediates that can + be utilized in other functions. The input parameter h is a value between 0 and 1 and represents + the heritability or the proportion of the total variance attributed to genetics. The X is the + covariate matrix. + """ + + #print("h is", pf(h)) + #print("X is", pf(X)) + print("X.shape is", pf(X.shape)) + + S = 1.0/(h*self.Kva + (1.0 - h)) + Xt = X.T*S + XX = np.dot(Xt,X) + + + XX_i = linalg.inv(XX) + beta = np.dot(np.dot(XX_i,Xt),self.Yt) + Yt = self.Yt - np.dot(X,beta) + Q = np.dot(Yt.T*S,Yt) + sigma = Q * 1.0 / (float(len(self.Yt)) - float(X.shape[1])) + return beta,sigma,Q,XX_i,XX + + def LL_brent(self,h,X=None,REML=False): return -self.LL(h,X,stack=False,REML=REML)[0] + def LL(self,h,X=None,stack=True,REML=False): + + """ + Computes the log-likelihood for a given heritability (h). If X==None, then the + default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with + the input X. If stack is false, then X is used in place of X0t in the LL calculation. + REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. + """ + + if X == None: X = self.X0t + elif stack: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)]) + + n = float(self.N) + q = float(X.shape[1]) + beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) + LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) + LL = -0.5 * LL + + if REML: + LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(np.dot(X.T,X))) - np.log(linalg.det(XX)) + LL = LL + 0.5*LL_REML_part + + return LL,beta,sigma,XX_i + + def getMax(self,H, X=None,REML=False): + + """ + Helper functions for .fit(...). + This function takes a set of LLs computed over a grid and finds possible regions + containing a maximum. Within these regions, a Brent search is performed to find the + optimum. + + """ + n = len(self.LLs) + HOpt = [] + for i in range(1,n-2): + if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) + + if len(HOpt) > 1: + sys.stderr.write("ERR: Found multiple maximum. Returning first...\n") + return HOpt[0] + elif len(HOpt) == 1: return HOpt[0] + elif self.LLs[0] > self.LLs[n-1]: return H[0] + else: return H[n-1] + + def fit(self,X=None,ngrids=100,REML=True): + + """ + Finds the maximum-likelihood solution for the heritability (h) given the current parameters. + X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as + the covariate matrix. + + This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. + Given this optimum, the function computes the LL and associated ML solutions. + """ + + if X == None: X = self.X0t + else: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)]) + H = np.array(range(ngrids)) / float(ngrids) + L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) + self.LLs = L + + hmax = self.getMax(H,X,REML) + L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) + + self.H = H + self.optH = hmax + self.optLL = L + self.optBeta = beta + self.optSigma = sigma + + return hmax,beta,sigma,L + + + def association(self,X, h = None, stack=True,REML=True): + + """ + Calculates association statitics for the SNPs encoded in the vector X of size n. + If h == None, the optimal h stored in optH is used. + + """ + if stack: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)]) + if h == None: h = self.optH + + L,beta,sigma,betaSTDERR = self.LL(h,X,stack=False,REML=REML) + q = len(beta) + ts,ps = self.tstat(beta[q-1],betaSTDERR[q-1,q-1],sigma,q) + return ts,ps + + def tstat(self,beta,stderr,sigma,q): + + """ + Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. + This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. + """ + + ts = beta / np.sqrt(stderr * sigma) + ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) + return ts,ps + + def plotFit(self,color='b-',title=''): + + """ + Simple function to visualize the likelihood space. It takes the LLs + calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. + The resulting "probabilities" are normalized to one and plotted against heritability. + This can be seen as an approximation to the posterior distribuiton of heritability. + + For diagnostic purposes this lets you see if there is one distinct maximum or multiple + and what the variance of the parameter looks like. + """ + mx = self.LLs.max() + p = np.exp(self.LLs - mx) + p = p/p.sum() + + pl.plot(self.H,p,color) + pl.xlabel("Heritability") + pl.ylabel("Probability of data") + pl.title(title) -- cgit 1.4.1 From 9b0264bf13e994298de95a4e08198336b6c97a38 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 7 Feb 2013 17:58:34 -0600 Subject: Added code to marker_regression.py that creates the numpy arrays to pass to Nick's code and changed the prep_data.py code to operate on a list of phenotype values instead of a textfile with the values delimited --- wqflask/base/webqtlConfig.py | 3 +- wqflask/base/webqtlConfigLocal.py | 2 +- .../wqflask/marker_regression/marker_regression.py | 42 +++++++++--- wqflask/wqflask/my_pylmm/data/prep_data.py | 74 +++++++++++++--------- wqflask/wqflask/my_pylmm/example.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 21 +++++- wqflask/wqflask/show_trait/show_trait.py | 2 +- wqflask/wqflask/views.py | 6 +- 8 files changed, 103 insertions(+), 49 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index d5f09b64..d05fa6e0 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -55,8 +55,9 @@ HTMLPATH = GNROOT + 'web/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' -TMPDIR = '/tmp/' +TMPDIR = HTMLPATH + 'tmp/' GENODIR = HTMLPATH + 'genotypes/' +NEWGENODIR = HTMLPATH + 'new_genotypes/' GENO_ARCHIVE_DIR = GENODIR + 'archive/' TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/' CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/' diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py index 84686234..8e3e0bbe 100755 --- a/wqflask/base/webqtlConfigLocal.py +++ b/wqflask/base/webqtlConfigLocal.py @@ -12,7 +12,7 @@ DB_UPDNAME = 'db_webqtl_zas1024' DB_UPDUSER = 'webqtl' DB_UPDPASSWD = 'webqtl' -GNROOT = '/home/zas1024/gn/' +GNROOT = '/home/zas1024/gene/' ROOT_URL = 'http://alexandria.uthsc.edu:91/' PythonPath = '/usr/bin/python' PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 7cdc350f..92270eb2 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -15,6 +15,8 @@ import os import httplib import urllib +import numpy as np + from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil @@ -25,6 +27,8 @@ from utility import webqtlUtil, helper_functions from base import webqtlConfig from dbFunction import webqtlDatabaseFunction from base.GeneralObject import GeneralObject +from wqflask.my_pylmm.data import prep_data +from wqflask.my_pylmm.pyLMM import lmm import reaper import cPickle @@ -63,22 +67,24 @@ class MarkerRegression(object): self.samples = [] # Want only ones with values self.vals = [] - self.variances = [] + #self.variances = [] assert start_vars['display_all_lrs'] in ('True', 'False') self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] - variance = start_vars['variance:' + sample] - if variance.strip().lower() == 'x': - variance = 0 - else: - variance = float(variance) - if value.strip().lower() != 'x': - self.samples.append(str(sample)) - self.vals.append(float(value)) - self.variances.append(variance) + #variance = start_vars['variance:' + sample] + #if variance.strip().lower() == 'x': + # variance = 0 + #else: + # variance = float(variance) + #if value.strip().lower() != 'x': + self.samples.append(str(sample)) + self.vals.append(value) + #self.variances.append(variance) + + #self.initializeParameters(start_vars) @@ -447,6 +453,22 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" + prep_data.PrepData(self.vals, self.dataset.group.name) + + pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) + genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, + self.dataset.group.name + '.snps.new')) + + print("genotypes is:", pf(genotypes)) + + kinship_matrix = lmm.calculateKinship(genotypes) + print("kinship_matrix is:", pf(kinship_matrix)) + print("pheno_vector is:", pf(pheno_vector)) + + lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) + lmm_ob.fit() + + #calculate QTL for each trait self.qtl_results = self.genotype.regression(strains = self.samples, trait = self.vals) diff --git a/wqflask/wqflask/my_pylmm/data/prep_data.py b/wqflask/wqflask/my_pylmm/data/prep_data.py index b7a133c2..ef42a297 100644 --- a/wqflask/wqflask/my_pylmm/data/prep_data.py +++ b/wqflask/wqflask/my_pylmm/data/prep_data.py @@ -1,27 +1,29 @@ #!/usr/bin/python from __future__ import absolute_import, print_function, division +import os + import numpy - +from base import webqtlConfig + + class PrepData(object): - def __init__(self, exprs_file, snps_file): - self.exprs_file = exprs_file - self.snps_file = snps_file - self.empty_columns = set() + def __init__(self, pheno_vector, group_name): + self.pheno_vector = pheno_vector + self.group_name = group_name + self.no_val_samples = set() #self.identify_no_genotype_samples() self.identify_empty_samples() self.trim_files() def identify_empty_samples(self): - with open(self.exprs_file) as fh: - for line in fh: - for pos, item in enumerate(line.split()): - if item == "NA": - self.empty_columns.add(pos) - #print("self.empty_columns:", self.empty_columns) - nums = set(range(0, 176)) - print("not included:", nums-self.empty_columns) + for sample_count, val in enumerate(self.pheno_vector): + if val == "x": + self.no_val_samples.add(sample_count) + print("self.no_val_samples:", self.no_val_samples) + #nums = set(range(0, 176)) + #print("not included:", nums-self.empty_columns) #def identify_no_genotype_samples(self): # #for this_file in (self.exprs_file, self.snps_file): @@ -43,22 +45,36 @@ class PrepData(object): # print(no_geno_samples) def trim_files(self): - for this_file in (self.exprs_file, self.snps_file): - input_file = open(this_file) - this_file_name_output = this_file + ".new" - with open(this_file_name_output, "w") as output: - for line in input_file: - data_wanted = [] - for pos, item in enumerate(line.split()): - if pos in self.empty_columns: - continue - else: - data_wanted.append("%2s" % (item)) - #print("data_wanted is", data_wanted) - output.write(" ".join(data_wanted) + "\n") - print("Done writing file:", this_file_name_output) + input_file = open(os.path.join(webqtlConfig.NEWGENODIR, self.group_name+'.snps')) + output_file = os.path.join(webqtlConfig.TMPDIR, self.group_name + '.snps.new') + with open(output_file, "w") as output_file: + for line in input_file: + data_to_write = [] + for pos, item in enumerate(line.split()): + if pos in self.no_val_samples: + continue + else: + data_to_write.append("%s" % (item)) + output_file.write(" ".join(data_to_write) + "\n") + + print("Done writing:", output_file) + + #for this_file in (self.exprs_file, self.genotype_file): + # input_file = open(this_file) + # this_file_name_output = this_file + ".new" + # with open(this_file_name_output, "w") as output_file: + # for line in input_file: + # data_wanted = [] + # for pos, item in enumerate(line.split()): + # if pos in self.empty_columns: + # continue + # else: + # data_wanted.append("%2s" % (item)) + # #print("data_wanted is", data_wanted) + # output_file.write(" ".join(data_wanted) + "\n") + # print("Done writing file:", this_file_name_output) if __name__=="__main__": exprs_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.exprs.1""" - snps_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" - PrepData(exprs_file, snps_file) \ No newline at end of file + genotype_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" + PrepData(pheno_vector, genotype_file) \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py index 0348d67b..8b30debd 100644 --- a/wqflask/wqflask/my_pylmm/example.py +++ b/wqflask/wqflask/my_pylmm/example.py @@ -20,7 +20,7 @@ print("exprs is:", pf(Y.shape)) # These three lines will load all SNPs (from npdump or from txt) and # calculate the kinship -snps = np.genfromtxt('data/mdp.snps.1000.new').T +snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T print("snps is:", pf(snps.shape)) #snps = snps[~np.isnan(snps).all(axis=1)] #print ("snps is now:", pf(snps)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 7fe599c4..1ae663d4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -142,20 +142,35 @@ class LMM: is not done consistently. """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None): - + def __init__(self, Y, K, Kva=None, Kve=None, X0=None): """ The constructor takes a phenotype vector or array of size n. It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. If they are not provided, the constructor will calculate them. X0 is an optional covariate matrix of size n x q, where there are q covariates. When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if Kva is None: + Kva = [] + if Kve is None: + Kve = [] + + + if X0 == None: + X0 = np.ones(len(Y)).reshape(len(Y),1) + print("Y is:", pf(Y)) + + for key, value in locals().iteritems(): + print(" %s - %s" % (key, type(value))) + x = Y != -9 + print("x is:", pf(x)) if not x.sum() == len(Y): + print("x.sum is:", pf(x.sum())) + print("len(Y) is:", pf(len(Y))) sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) Y = Y[x] K = K[x,:][:,x] diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 603c40f5..33ea6e86 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -130,7 +130,7 @@ class ShowTrait(object): js_data = dict(sample_group_types = self.sample_group_types, sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes) - print("js_data:", pf(js_data)) + #print("js_data:", pf(js_data)) self.js_data = js_data diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 472548f0..81777742 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -136,15 +136,15 @@ def show_trait_page(): #fd = webqtlFormData.webqtlFormData(request.args) #print("stp y1:", pf(vars(fd))) template_vars = show_trait.ShowTrait(request.args) - print("js_data before dump:", template_vars.js_data) + #print("js_data before dump:", template_vars.js_data) template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") # Sorting the keys messes up the ordered dictionary, so don't do that #sort_keys=True) - print("js_data after dump:", template_vars.js_data) - print("show_trait template_vars:", pf(template_vars.__dict__)) + #print("js_data after dump:", template_vars.js_data) + #print("show_trait template_vars:", pf(template_vars.__dict__)) return render_template("show_trait.html", **template_vars.__dict__) @app.route("/marker_regression", methods=('POST',)) -- cgit 1.4.1 From bf6e7bd8e473a10d80044fa6bf778b261d5ee6ff Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 8 Feb 2013 17:47:58 -0600 Subject: Converted .geno files to json files and wrote code in marker_regression.py that loads the json files, converts them into the relevant numpy arrays, and passes them into Nick's code (which is returning results that may or may not be correct, but is at least running) --- .../wqflask/marker_regression/marker_regression.py | 61 +- wqflask/wqflask/my_pylmm/data/genofile_parser.py | 85 ++- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 663 +++++++++++---------- 3 files changed, 462 insertions(+), 347 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 92270eb2..13ec4280 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -17,6 +17,8 @@ import urllib import numpy as np +import json + from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil @@ -453,21 +455,43 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - prep_data.PrepData(self.vals, self.dataset.group.name) + json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) + markers = json.load(json_data) + genotype_data = [marker['genotypes'] for marker in markers] + + no_val_samples = self.identify_empty_samples() + trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + + #print("trimmed genotype data is:", pf(trimmed_genotype_data)) + + #for marker_object in genotype_data: + # print("marker_object:", pf(marker_object)) + + + #prep_data.PrepData(self.vals, genotype_data) pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) - genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, - self.dataset.group.name + '.snps.new')) + genotypes = np.array(trimmed_genotype_data).T + print("genotypes is", pf(genotypes)) + #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, + # self.dataset.group.name + '.snps.new')).T - print("genotypes is:", pf(genotypes)) + print("pheno_vector is:", pf(pheno_vector.shape)) + print("genotypes is:", pf(genotypes.shape)) kinship_matrix = lmm.calculateKinship(genotypes) print("kinship_matrix is:", pf(kinship_matrix)) - print("pheno_vector is:", pf(pheno_vector)) lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) lmm_ob.fit() - + + t_stats, p_values = lmm.GWAS(pheno_vector, + genotypes, + kinship_matrix, + REML=True, + refit=False) + + print("p_values is:", pf(len(p_values))) #calculate QTL for each trait self.qtl_results = self.genotype.regression(strains = self.samples, @@ -633,6 +657,31 @@ class MarkerRegression(object): #return rv,tblobj,bottomInfo + def identify_empty_samples(self): + no_val_samples = [] + for sample_count, val in enumerate(self.vals): + if val == "x": + no_val_samples.append(sample_count) + return no_val_samples + #print("self.no_val_samples:", self.no_val_samples) + #nums = set(range(0, 176)) + #print("not included:", nums-self.empty_columns) + + def trim_genotypes(self, genotype_data, no_value_samples): + trimmed_genotype_data = [] + for marker in genotype_data: + new_genotypes = [] + for item_count, genotype in enumerate(marker): + if item_count in no_value_samples: + continue + try: + genotype = float(genotype) + except ValueError: + pass + new_genotypes.append(genotype) + trimmed_genotype_data.append(new_genotypes) + return trimmed_genotype_data + def plotIntMappingForPLINK(self, fd, canvas, offset= (80, 120, 20, 80), zoom = 1, startMb = None, endMb = None, showLocusForm = "",plinkResultDict={}): #calculating margins xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index 1dafecc8..ec8c521c 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -1,13 +1,31 @@ #!/usr/bin/python from __future__ import print_function, division, absolute_import -import csv +import sys +sys.path.append("..") import os import glob import traceback +import numpy as np +from pyLMM import lmm + +import simplejson as json + +from pprint import pformat as pf + class EmptyConfigurations(Exception): pass + + +class Marker(object): + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] + class ConvertGenoFile(object): def __init__(self, input_file, output_file): @@ -15,6 +33,9 @@ class ConvertGenoFile(object): self.input_file = input_file self.output_file = output_file + self.mb_exists = False + self.markers = [] + self.latest_row_pos = None self.latest_col_pos = None @@ -23,7 +44,7 @@ class ConvertGenoFile(object): def convert(self): - self.prefer_config = { + self.haplotype_notation = { '@mat': "1", '@pat': "0", '@het': "0.5", @@ -31,36 +52,56 @@ class ConvertGenoFile(object): } self.configurations = {} - self.skipped_cols = 3 + #self.skipped_cols = 3 self.input_fh = open(self.input_file) - with open(self.output_file, "w") as self.output_fh: self.process_csv() - - - + + #def process_row(self, row): # counter = 0 # for char in row: # if char # counter += 1 - + def process_csv(self): for row_count, row in enumerate(self.process_rows()): #self.latest_row_pos = row_count - for item_count, item in enumerate(row.split()[self.skipped_cols:]): + row_items = row.split() + + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + this_marker.cM = row_items[2] + if self.mb_exists: + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + else: + genotypes = row_items[3:] + for item_count, genotype in enumerate(genotypes): + this_marker.genotypes.append(self.configurations[genotype.upper()]) + + #print("this_marker is:", pf(this_marker.__dict__)) + + self.markers.append(this_marker.__dict__) + + with open(self.output_file, 'w') as fh: + json.dump(self.markers, fh, indent=" ", sort_keys=True) + # print('configurations:', str(configurations)) - self.latest_col_pos = item_count + self.skipped_cols - self.latest_col_value = item - if item_count != 0: - self.output_fh.write(" ") - self.output_fh.write(self.configurations[item.upper()]) + #self.latest_col_pos = item_count + self.skipped_cols + #self.latest_col_value = item + + #if item_count != 0: + # self.output_fh.write(" ") + #self.output_fh.write(self.configurations[item.upper()]) - self.output_fh.write("\n") - + #self.output_fh.write("\n") + + def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): self.latest_row_value = row @@ -69,14 +110,14 @@ class ConvertGenoFile(object): continue if row.startswith('Chr'): if 'Mb' in row.split(): - self.skipped_cols = 4 + self.mb_exists = True continue if row.startswith('@'): key, _separater, value = row.partition(':') key = key.strip() value = value.strip() - if key in self.prefer_config: - self.configurations[value] = self.prefer_config[key] + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] continue if not len(self.configurations): raise EmptyConfigurations @@ -87,17 +128,17 @@ class ConvertGenoFile(object): os.chdir(old_directory) for input_file in glob.glob("*.geno"): group_name = input_file.split('.')[0] - output_file = os.path.join(new_directory, group_name + ".snps") + output_file = os.path.join(new_directory, group_name + ".json") print("%s -> %s" % (input_file, output_file)) convertob = ConvertGenoFile(input_file, output_file) try: - convertob.convert() + convertob.convert() except EmptyConfigurations as why: print(" No config info? Continuing...") #excepted = True continue except Exception as why: - + print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %i at tabular column %i" % (convertob.latest_row_pos, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 1ae663d4..015c2e14 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -1,109 +1,128 @@ -# pyLMM software Copyright 2012, Nicholas A. Furlotte -# Version 0.1 - -#License Details -#--------------- - -# The program is free for academic use. Please contact Nick Furlotte -# if you are interested in using the software for -# commercial purposes. - -# The software must not be modified and distributed without prior -# permission of the author. -# Any instance of this software must retain the above copyright notice. - -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# pylmm is a python-based linear mixed-model solver with applications to GWAS + +# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . from __future__ import absolute_import, print_function, division import sys import time import numpy as np -import numpy.linalg as linalg +from scipy import linalg from scipy import optimize from scipy import stats -#import matplotlib.pyplot as pl import pdb from pprint import pformat as pf +#np.seterr('raise') + +def matrixMult(A,B): + #return np.dot(A,B) + + print("A is:", pf(A.shape)) + print("B is:", pf(B.shape)) + + # If the matrices are in Fortran order then the computations will be faster + # when using dgemm. Otherwise, the function will copy the matrix and that takes time. + if not A.flags['F_CONTIGUOUS']: + AA = A.T + transA = True + else: + AA = A + transA = False + + if not B.flags['F_CONTIGUOUS']: + BB = B.T + transB = True + else: + BB = B + transB = False + + return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) + def calculateKinship(W): - """ - W is an n x m matrix encoding SNP minor alleles. - """ - n = W.shape[0] - m = W.shape[1] - keep = [] - for i in range(m): - mn = W[True - np.isnan(W[:,i]),i].mean() - W[np.isnan(W[:,i]),i] = mn - vr = W[:,i].var() - if vr == 0: continue - - keep.append(i) - W[:,i] = (W[:,i] - mn) / np.sqrt(vr) - - W = W[:,keep] - K = np.dot(W,W.T) * 1.0/float(m) - return K + """ + W is an n x m matrix encoding SNP minor alleles. + + This function takes a matrix oF SNPs, imputes missing values with the maf, + normalizes the resulting vectors and returns the RRM matrix. + """ + n = W.shape[0] + m = W.shape[1] + keep = [] + for i in range(m): + mn = W[True - np.isnan(W[:,i]),i].mean() + W[np.isnan(W[:,i]),i] = mn + vr = W[:,i].var() + if vr == 0: continue + + keep.append(i) + W[:,i] = (W[:,i] - mn) / np.sqrt(vr) + + W = W[:,keep] + K = matrixMult(W,W.T) * 1.0/float(m) + return K def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): - """ - Performs a basic GWAS scan using the LMM. This function - uses the LMM module to assess association at each SNP and - does some simple cleanup, such as removing missing individuals - per SNP and re-computing the eigen-decomp - - Y - n x 1 phenotype vector - X - n x m SNP matrix - K - n x n kinship matrix - Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K - X0 - n x q covariate matrix - REML - use restricted maximum likelihood - refit - refit the variance component for each SNP - """ - n = X.shape[0] - m = X.shape[1] - - if X0 == None: X0 = np.ones((n,1)) - - # Remove missing values in Y and adjust associated parameters - v = np.isnan(Y) - if v.sum(): - keep = True - v - Y = Y[keep] - X = X[keep,:] - X0 = X0[keep,:] - K = K[keep,:][:,keep] - Kva = [] - Kve = [] - - L = LMM(Y,K,Kva,Kve,X0) - if not refit: L.fit() - - PS = [] - TS = [] - - for i in range(m): - x = X[:,i].reshape((n,1)) - v = np.isnan(x).reshape((-1,)) - if v.sum(): + """ + Performs a basic GWAS scan using the LMM. This function + uses the LMM module to assess association at each SNP and + does some simple cleanup, such as removing missing individuals + per SNP and re-computing the eigen-decomp + + Y - n x 1 phenotype vector + X - n x m SNP matrix + K - n x n kinship matrix + Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K + X0 - n x q covariate matrix + REML - use restricted maximum likelihood + refit - refit the variance component for each SNP + """ + n = X.shape[0] + m = X.shape[1] + + if X0 == None: X0 = np.ones((n,1)) + + # Remove missing values in Y and adjust associated parameters + v = np.isnan(Y) + if v.sum(): + keep = True - v + Y = Y[keep] + X = X[keep,:] + X0 = X0[keep,:] + K = K[keep,:][:,keep] + Kva = [] + Kve = [] + + L = LMM(Y,K,Kva,Kve,X0) + if not refit: L.fit() + + PS = [] + TS = [] + + for i in range(m): + x = X[:,i].reshape((n,1)) + v = np.isnan(x).reshape((-1,)) + if v.sum(): keep = True - v xs = x[keep,:] - if xs.var() == 0: - PS.append(np.nan) - TS.append(np.nan) - continue + if xs.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue Ys = Y[keep] X0s = X0[keep,:] @@ -112,245 +131,251 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): if refit: Ls.fit(X=xs) else: Ls.fit() ts,ps = Ls.association(xs,REML=REML) - else: - if x.var() == 0: - PS.append(np.nan) - TS.append(np.nan) - continue + else: + if x.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue if refit: L.fit(X=x) ts,ps = L.association(x,REML=REML) + + PS.append(ps) + TS.append(ts) - PS.append(ps) - TS.append(ts) - - return TS,PS + return TS,PS class LMM: - """ - This is a simple version of EMMA/fastLMM. - The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) - and to optimize this model by finding the maximum-likelihood estimates for the model parameters. - There are three model parameters: heritability (h), covariate coefficients (beta) and the total - phenotypic variance (sigma). - Heritability as defined here is the proportion of the total variance (sigma) that is attributed to - the kinship matrix. - - For simplicity, we assume that everything being input is a numpy array. - If this is not the case, the module may throw an error as conversion from list to numpy array - is not done consistently. - - """ - def __init__(self, Y, K, Kva=None, Kve=None, X0=None): - """ - The constructor takes a phenotype vector or array of size n. - It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. - If they are not provided, the constructor will calculate them. - X0 is an optional covariate matrix of size n x q, where there are q covariates. - When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. - - """ - - if Kva is None: - Kva = [] - if Kve is None: - Kve = [] - - - if X0 == None: - X0 = np.ones(len(Y)).reshape(len(Y),1) - - print("Y is:", pf(Y)) - - for key, value in locals().iteritems(): - print(" %s - %s" % (key, type(value))) - - x = Y != -9 - print("x is:", pf(x)) - if not x.sum() == len(Y): - print("x.sum is:", pf(x.sum())) - print("len(Y) is:", pf(len(Y))) - sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) - Y = Y[x] - K = K[x,:][:,x] - X0 = X0[x,:] - Kva = [] - Kve = [] - self.nonmissing = x - - if len(Kva) == 0 or len(Kve) == 0: - sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - begin = time.time() - Kva,Kve = linalg.eigh(K) - end = time.time() - sys.stderr.write("Total time: %0.3f\n" % (end - begin)) - self.K = K - self.Kva = Kva - self.Kve = Kve - self.Y = Y - self.X0 = X0 - self.N = self.K.shape[0] - - self.transform() - - def transform(self): + """ + This is a simple version of EMMA/fastLMM. + The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) + and to optimize this model by finding the maximum-likelihood estimates for the model parameters. + There are three model parameters: heritability (h), covariate coefficients (beta) and the total + phenotypic variance (sigma). + Heritability as defined here is the proportion of the total variance (sigma) that is attributed to + the kinship matrix. + + For simplicity, we assume that everything being input is a numpy array. + If this is not the case, the module may throw an error as conversion from list to numpy array + is not done consistently. + + """ + def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): + + """ + The constructor takes a phenotype vector or array of size n. + It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. + If they are not provided, the constructor will calculate them. + X0 is an optional covariate matrix of size n x q, where there are q covariates. + When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ + + if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + self.verbose = verbose + + #x = Y != -9 + x = True - np.isnan(Y) + if not x.sum() == len(Y): + if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) + Y = Y[x] + K = K[x,:][:,x] + X0 = X0[x,:] + Kva = [] + Kve = [] + self.nonmissing = x + + if len(Kva) == 0 or len(Kve) == 0: + if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + begin = time.time() + Kva,Kve = linalg.eigh(K) + end = time.time() + if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + + self.K = K + self.Kva = Kva + self.Kve = Kve + self.Y = Y + self.X0 = X0 + self.N = self.K.shape[0] + + if sum(self.Kva < 1e-6): + if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) + self.Kva[self.Kva < 1e-6] = 1e-6 + + self.transform() + + def transform(self): """ Computes a transformation on the phenotype vector and the covariate matrix. - The transformation is obtained by left multiplying each parameter by the transpose of the + The transformation is obtained by left multiplying each parameter by the transpose of the eigenvector matrix of K (the kinship). """ - - print(len(self.Kve.T)) - print(len(self.Y)) - - self.Yt = np.dot(self.Kve.T, self.Y) - self.X0t = np.dot(self.Kve.T, self.X0) - - def getMLSoln(self,h,X): - - """ - Obtains the maximum-likelihood estimates for the covariate coefficients (beta), - the total variance of the trait (sigma) and also passes intermediates that can - be utilized in other functions. The input parameter h is a value between 0 and 1 and represents - the heritability or the proportion of the total variance attributed to genetics. The X is the - covariate matrix. - """ - - #print("h is", pf(h)) - #print("X is", pf(X)) - print("X.shape is", pf(X.shape)) - - S = 1.0/(h*self.Kva + (1.0 - h)) - Xt = X.T*S - XX = np.dot(Xt,X) - - - XX_i = linalg.inv(XX) - beta = np.dot(np.dot(XX_i,Xt),self.Yt) - Yt = self.Yt - np.dot(X,beta) - Q = np.dot(Yt.T*S,Yt) - sigma = Q * 1.0 / (float(len(self.Yt)) - float(X.shape[1])) - return beta,sigma,Q,XX_i,XX - - def LL_brent(self,h,X=None,REML=False): return -self.LL(h,X,stack=False,REML=REML)[0] - def LL(self,h,X=None,stack=True,REML=False): - - """ - Computes the log-likelihood for a given heritability (h). If X==None, then the - default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with - the input X. If stack is false, then X is used in place of X0t in the LL calculation. - REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. - """ - - if X == None: X = self.X0t - elif stack: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)]) - - n = float(self.N) - q = float(X.shape[1]) - beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) - LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) - LL = -0.5 * LL - - if REML: - LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(np.dot(X.T,X))) - np.log(linalg.det(XX)) - LL = LL + 0.5*LL_REML_part - - return LL,beta,sigma,XX_i - - def getMax(self,H, X=None,REML=False): - - """ - Helper functions for .fit(...). - This function takes a set of LLs computed over a grid and finds possible regions - containing a maximum. Within these regions, a Brent search is performed to find the - optimum. - - """ - n = len(self.LLs) - HOpt = [] - for i in range(1,n-2): - if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) - - if len(HOpt) > 1: - sys.stderr.write("ERR: Found multiple maximum. Returning first...\n") - return HOpt[0] - elif len(HOpt) == 1: return HOpt[0] - elif self.LLs[0] > self.LLs[n-1]: return H[0] - else: return H[n-1] - - def fit(self,X=None,ngrids=100,REML=True): - - """ - Finds the maximum-likelihood solution for the heritability (h) given the current parameters. - X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as - the covariate matrix. - - This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. - Given this optimum, the function computes the LL and associated ML solutions. - """ - - if X == None: X = self.X0t - else: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)]) - H = np.array(range(ngrids)) / float(ngrids) - L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) - self.LLs = L - - hmax = self.getMax(H,X,REML) - L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) - - self.H = H - self.optH = hmax - self.optLL = L - self.optBeta = beta - self.optSigma = sigma - - return hmax,beta,sigma,L - - - def association(self,X, h = None, stack=True,REML=True): - - """ - Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. - - """ - if stack: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)]) - if h == None: h = self.optH - - L,beta,sigma,betaSTDERR = self.LL(h,X,stack=False,REML=REML) - q = len(beta) - ts,ps = self.tstat(beta[q-1],betaSTDERR[q-1,q-1],sigma,q) - return ts,ps - - def tstat(self,beta,stderr,sigma,q): - - """ - Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. - This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. - """ - - ts = beta / np.sqrt(stderr * sigma) - ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) - return ts,ps - - def plotFit(self,color='b-',title=''): - - """ - Simple function to visualize the likelihood space. It takes the LLs - calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. - The resulting "probabilities" are normalized to one and plotted against heritability. - This can be seen as an approximation to the posterior distribuiton of heritability. - - For diagnostic purposes this lets you see if there is one distinct maximum or multiple - and what the variance of the parameter looks like. - """ - mx = self.LLs.max() - p = np.exp(self.LLs - mx) - p = p/p.sum() - - pl.plot(self.H,p,color) - pl.xlabel("Heritability") - pl.ylabel("Probability of data") - pl.title(title) + + self.Yt = matrixMult(self.Kve.T, self.Y) + self.X0t = matrixMult(self.Kve.T, self.X0) + self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) + self.q = self.X0t.shape[1] + + def getMLSoln(self,h,X): + + """ + Obtains the maximum-likelihood estimates for the covariate coefficients (beta), + the total variance of the trait (sigma) and also passes intermediates that can + be utilized in other functions. The input parameter h is a value between 0 and 1 and represents + the heritability or the proportion of the total variance attributed to genetics. The X is the + covariate matrix. + """ + + S = 1.0/(h*self.Kva + (1.0 - h)) + Xt = X.T*S + XX = matrixMult(Xt,X) + XX_i = linalg.inv(XX) + beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) + Yt = self.Yt - matrixMult(X,beta) + Q = np.dot(Yt.T*S,Yt) + sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) + return beta,sigma,Q,XX_i,XX + + def LL_brent(self,h,X=None,REML=False): + #brent will not be bounded by the specified bracket. + # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search. + if h < 0: return 1e6 + return -self.LL(h,X,stack=False,REML=REML)[0] + + def LL(self,h,X=None,stack=True,REML=False): + + """ + Computes the log-likelihood for a given heritability (h). If X==None, then the + default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with + the input X. If stack is false, then X is used in place of X0t in the LL calculation. + REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. + """ + + if X == None: X = self.X0t + elif stack: + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + n = float(self.N) + q = float(X.shape[1]) + beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) + LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) + LL = -0.5 * LL + + if REML: + LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX)) + LL = LL + 0.5*LL_REML_part + + return LL,beta,sigma,XX_i + + def getMax(self,H, X=None,REML=False): + + """ + Helper functions for .fit(...). + This function takes a set of LLs computed over a grid and finds possible regions + containing a maximum. Within these regions, a Brent search is performed to find the + optimum. + + """ + n = len(self.LLs) + HOpt = [] + for i in range(1,n-2): + if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: + HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) + if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]] + + if len(HOpt) > 1: + if self.verbose: sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") + return HOpt[0] + elif len(HOpt) == 1: return HOpt[0] + elif self.LLs[0] > self.LLs[n-1]: return H[0] + else: return H[n-1] + + def fit(self,X=None,ngrids=100,REML=True): + + """ + Finds the maximum-likelihood solution for the heritability (h) given the current parameters. + X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as + the covariate matrix. + + This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. + Given this optimum, the function computes the LL and associated ML solutions. + """ + + if X == None: X = self.X0t + else: + #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + H = np.array(range(ngrids)) / float(ngrids) + L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) + self.LLs = L + + hmax = self.getMax(H,X,REML) + L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) + + self.H = H + self.optH = hmax + self.optLL = L + self.optBeta = beta + self.optSigma = sigma + + return hmax,beta,sigma,L + + def association(self,X, h = None, stack=True,REML=True, returnBeta=False): + + """ + Calculates association statitics for the SNPs encoded in the vector X of size n. + If h == None, the optimal h stored in optH is used. + + """ + if stack: + #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + if h == None: h = self.optH + + L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) + q = len(beta) + ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) + + if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma + return ts,ps + + def tstat(self,beta,var,sigma,q): + + """ + Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. + This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. + """ + + ts = beta / np.sqrt(var * sigma) + ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) + if not len(ts) == 1 or not len(ps) == 1: raise Exception("Something bad happened :(") + return ts.sum(),ps.sum() + + def plotFit(self,color='b-',title=''): + + """ + Simple function to visualize the likelihood space. It takes the LLs + calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. + The resulting "probabilities" are normalized to one and plotted against heritability. + This can be seen as an approximation to the posterior distribuiton of heritability. + + For diagnostic purposes this lets you see if there is one distinct maximum or multiple + and what the variance of the parameter looks like. + """ + import matplotlib.pyplot as pl + + mx = self.LLs.max() + p = np.exp(self.LLs - mx) + p = p/p.sum() + + pl.plot(self.H,p,color) + pl.xlabel("Heritability") + pl.ylabel("Probability of data") + pl.title(title) \ No newline at end of file -- cgit 1.4.1 From b3853925653cf6145d7fb56b71edfc824a2d051a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 12 Feb 2013 16:20:56 -0600 Subject: Edited marker_regression.py and data_set.py to store the p-values and their corresponding markers to be used in the table of qtl results and other figures --- wqflask/base/data_set.py | 29 +++++++- .../wqflask/marker_regression/marker_regression.py | 80 +++++++++++++--------- wqflask/wqflask/my_pylmm/data/genofile_parser.py | 15 +++- 3 files changed, 89 insertions(+), 35 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 8ced1528..182e15e6 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -23,6 +23,8 @@ from __future__ import absolute_import, print_function, division import os +import json + from flask import Flask, g from htmlgen import HTMLgen2 as HT @@ -64,6 +66,21 @@ def create_dataset(dataset_name): return dataset_class(dataset_name) +class Markers(object): + """Todo: Build in cacheing so it saves us reading the same file more than once""" + def __init__(self, name): + json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) + self.markers = json.load(json_data) + + def add_pvalues(p_values): + #for count, marker in enumerate(self.markers): + # marker['p_value'] = p_values[count] + + for marker, p_value in itertools.izip(self.markers, p_values): + marker['p_value'] = p_value + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. @@ -84,6 +101,7 @@ class DatasetGroup(object): self.f1list = None self.parlist = None self.allsamples = None + self.markers = Markers(self.name) #def read_genotype(self): @@ -91,9 +109,16 @@ class DatasetGroup(object): # # if not self.genotype: # Didn'd succeed, so we try method 2 # self.read_genotype_data() - + + #def read_genotype_json(self): + # '''Read genotype from json file''' + # + # json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json')) + # markers = json.load(json_data) + # + def read_genotype_file(self): - '''read genotype from .geno file instead of database''' + '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': # self.group = 'BXD' # diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 13ec4280..1d005df4 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -454,10 +454,11 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - - json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) - markers = json.load(json_data) - genotype_data = [marker['genotypes'] for marker in markers] + + #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) + #markers = json.load(json_data) + + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) @@ -466,7 +467,6 @@ class MarkerRegression(object): #for marker_object in genotype_data: # print("marker_object:", pf(marker_object)) - #prep_data.PrepData(self.vals, genotype_data) @@ -492,40 +492,60 @@ class MarkerRegression(object): refit=False) print("p_values is:", pf(len(p_values))) + + self.dataset.group.markers.add_pvalues(p_values) #calculate QTL for each trait - self.qtl_results = self.genotype.regression(strains = self.samples, - trait = self.vals) - self.lrs_array = self.genotype.permutation(strains = self.samples, - trait = self.vals, - nperm=self.num_perm) + #self.qtl_results = self.genotype.regression(strains = self.samples, + # trait = self.vals) + #self.lrs_array = self.genotype.permutation(strains = self.samples, + # trait = self.vals, + # nperm=self.num_perm) + + self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers] self.lrs_thresholds = Bunch( - suggestive = self.lrs_array[int(self.num_perm*0.37-1)], - significant = self.lrs_array[int(self.num_perm*0.95-1)], - highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] + suggestive = self.lrs_values[int(self.num_perm*0.37-1)], + significant = self.lrs_values[int(self.num_perm*0.95-1)], + highly_significant = self.lrs_values[int(self.num_perm*0.99-1)] ) + #self.lrs_thresholds = Bunch( + # suggestive = self.lrs_array[int(self.num_perm*0.37-1)], + # significant = self.lrs_array[int(self.num_perm*0.95-1)], + # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] + # ) + if self.display_all_lrs: - filtered_results = self.qtl_results + self.filtered_results = self.dataset.group.markers.markers else: - suggestive_results = [] + self.filtered_results = [] self.pure_qtl_results = [] - for result in self.qtl_results: - self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, - mb=result.locus.Mb, - chromosome=result.locus.chr), - lrs=result.lrs, - additive=result.additive)) - if result.lrs > self.lrs_thresholds.suggestive: - suggestive_results.append(result) - filtered_results = suggestive_results + for marker in self.dataset.group.markers.markers: + self.pure_qtl_results.append(marker) + if marker['lrs_value'] > self.lrs_thresholds.suggestive: + self.filtered_results.append(marker) + + #if self.display_all_lrs: + # filtered_results = self.qtl_results + #else: + # suggestive_results = [] + # self.pure_qtl_results = [] + # for result in self.qtl_results: + # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, + # mb=result.locus.Mb, + # chromosome=result.locus.chr), + # lrs=result.lrs, + # additive=result.additive)) + # if result.lrs > self.lrs_thresholds.suggestive: + # suggestive_results.append(result) + # filtered_results = suggestive_results # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used - if not filtered_results: + if not self.filtered_results: # We use the 10 results with the highest LRS values - filtered_results = sorted(self.qtl_results)[-10:] + self.filtered_results = sorted(self.qtl_results)[-10:] self.top_10 = True else: self.top_10 = False @@ -567,11 +587,9 @@ class MarkerRegression(object): #permutation = HT.TableLite() #permutation.append(HT.TR(HT.TD(img))) - for marker in filtered_results: - if marker.lrs > webqtlConfig.MAXLRS: - marker.lrs = webqtlConfig.MAXLRS - - self.filtered_results = filtered_results + for marker in self.filtered_results: + if marker['lrs_value'] > webqtlConfig.MAXLRS: + marker['lrs_value'] = webqtlConfig.MAXLRS #if fd.genotype.type == 'intercross': # ncol =len(headerList) diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index ec8c521c..8c74fe74 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -28,10 +28,11 @@ class Marker(object): class ConvertGenoFile(object): - def __init__(self, input_file, output_file): + def __init__(self, input_file, output_file, file_type): self.input_file = input_file self.output_file = output_file + self.file_type = file_type self.mb_exists = False self.markers = [] @@ -57,7 +58,10 @@ class ConvertGenoFile(object): self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: - self.process_csv() + if self.file_type == "geno": + self.process_csv() + elif self.file_type == "snps": + self.process_snps_file() #def process_row(self, row): @@ -66,6 +70,7 @@ class ConvertGenoFile(object): # if char # counter += 1 + def process_csv(self): for row_count, row in enumerate(self.process_rows()): #self.latest_row_pos = row_count @@ -146,6 +151,12 @@ class ConvertGenoFile(object): print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break + + def process_snps_file(cls, snps_file, new_directory): + output_file = os.path.join(new_directory, "mouse_families.json") + print("%s -> %s" % (snps_file, output_file)) + convertob = ConvertGenoFile(input_file, output_file) + if __name__=="__main__": -- cgit 1.4.1 From e416b7b4fcabff05d1665ae5dbb962cfb61e471d Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 13 Feb 2013 14:26:52 -0600 Subject: Fixed some bugs related to getting the marker regression page working with Nick's code --- wqflask/base/data_set.py | 9 ++++----- wqflask/wqflask/marker_regression/marker_regression.py | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 182e15e6..d4e97370 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -22,8 +22,10 @@ from __future__ import absolute_import, print_function, division import os +import math import json +import itertools from flask import Flask, g @@ -70,12 +72,9 @@ class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" def __init__(self, name): json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) - self.markers = json.load(json_data) + self.markers = json.load(json_data_fh) - def add_pvalues(p_values): - #for count, marker in enumerate(self.markers): - # marker['p_value'] = p_values[count] - + def add_pvalues(self, p_values): for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 1d005df4..c9451154 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -458,7 +458,7 @@ class MarkerRegression(object): #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) #markers = json.load(json_data) - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers] + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) @@ -502,7 +502,9 @@ class MarkerRegression(object): # trait = self.vals, # nperm=self.num_perm) - self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers] + self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] + print("self.lrs_values is:", pf(self.lrs_values)) + print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) self.lrs_thresholds = Bunch( suggestive = self.lrs_values[int(self.num_perm*0.37-1)], -- cgit 1.4.1 From 7188d8e3fb54901a36abffa72f5bb63eed530b9c Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 13 Feb 2013 15:24:57 -0600 Subject: Got results for marker regression page displaying in table --- .../wqflask/marker_regression/marker_regression.py | 18 +++++++++++----- wqflask/wqflask/templates/marker_regression.html | 25 +++++++++++++++++++++- 2 files changed, 37 insertions(+), 6 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c9451154..c06ab7e8 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -308,7 +308,7 @@ class MarkerRegression(object): self.js_data = dict( qtl_results = self.pure_qtl_results, - lrs_array = self.lrs_array, + lrs_values = self.lrs_values, ) @@ -504,12 +504,20 @@ class MarkerRegression(object): self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] print("self.lrs_values is:", pf(self.lrs_values)) + lrs_values_sorted = sorted(self.lrs_values) + + print("lrs_values_sorted is:", pf(lrs_values_sorted)) print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) - + + lrs_values_length = len(lrs_values_sorted) + + def lrs_threshold(place): + return lrs_values_sorted[int((lrs_values_length * place) -1)] + self.lrs_thresholds = Bunch( - suggestive = self.lrs_values[int(self.num_perm*0.37-1)], - significant = self.lrs_values[int(self.num_perm*0.95-1)], - highly_significant = self.lrs_values[int(self.num_perm*0.99-1)] + suggestive = lrs_threshold(.37), + significant = lrs_threshold(.95), + highly_significant = lrs_threshold(.99), ) #self.lrs_thresholds = Bunch( diff --git a/wqflask/wqflask/templates/marker_regression.html b/wqflask/wqflask/templates/marker_regression.html index aeb05132..b5477070 100644 --- a/wqflask/wqflask/templates/marker_regression.html +++ b/wqflask/wqflask/templates/marker_regression.html @@ -18,9 +18,32 @@
+ + + + + + + + + + + + + {% for marker in filtered_results %} + + + + + + + + {% endfor %} + +
IndexLRSChrMbLocus
{{loop.index}}{{marker.lrs_value}}{{marker.chr}}{{marker.Mb}}{{marker.name}}
+
- {% endblock %} -- cgit 1.4.1 From 745bcbeabdf5794b5f3185fcd497dfce04f09b84 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 13 Feb 2013 18:07:27 -0600 Subject: Got results displaying for BXD and fixed genofile_parser.py to work for all of our genofiles --- .../wqflask/marker_regression/marker_regression.py | 26 +- wqflask/wqflask/my_pylmm/data/genofile_parser.py | 31 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 48 +- .../new/packages/DataTables/css/demo_page.css | 17 +- .../new/packages/DataTables/css/demo_table.css | 3 +- .../new/packages/DataTables/css/demo_table_jui.css | 25 - .../packages/DataTables/css/jquery.dataTables.css | 3 +- .../css/jquery.dataTables_themeroller.css | 1 - .../packages/DataTables/js/jquery.dataTables.js | 1412 ++++++++++++-------- .../DataTables/js/jquery.dataTables.min.js | 277 ++-- .../static/new/packages/DataTables/js/jquery.js | 6 +- .../new/packages/DataTables/src/DataTables.js | 37 +- .../packages/DataTables/src/api/api.internal.js | 6 +- .../new/packages/DataTables/src/api/api.methods.js | 72 +- .../new/packages/DataTables/src/core/core.ajax.js | 6 +- .../packages/DataTables/src/core/core.columns.js | 108 +- .../DataTables/src/core/core.constructor.js | 49 +- .../new/packages/DataTables/src/core/core.data.js | 174 ++- .../new/packages/DataTables/src/core/core.draw.js | 40 +- .../packages/DataTables/src/core/core.filter.js | 56 +- .../new/packages/DataTables/src/core/core.info.js | 24 +- .../packages/DataTables/src/core/core.length.js | 2 +- .../new/packages/DataTables/src/core/core.page.js | 2 +- .../packages/DataTables/src/core/core.scrolling.js | 118 +- .../packages/DataTables/src/core/core.sizing.js | 12 +- .../new/packages/DataTables/src/core/core.sort.js | 87 +- .../new/packages/DataTables/src/core/core.state.js | 51 +- .../packages/DataTables/src/core/core.support.js | 54 +- .../new/packages/DataTables/src/ext/ext.paging.js | 31 +- .../packages/DataTables/src/model/model.column.js | 47 +- .../DataTables/src/model/model.defaults.columns.js | 276 ++-- .../DataTables/src/model/model.defaults.js | 136 +- .../new/packages/DataTables/src/model/model.ext.js | 8 +- .../new/packages/DataTables/src/model/model.row.js | 2 +- .../DataTables/src/model/model.settings.js | 16 +- .../tests_onhold/5_ajax_objects/_zero_config.js | 10 +- .../_zero_config_arrays_subobjects.js | 6 +- .../5_ajax_objects/_zero_config_deep.js | 10 +- .../5_ajax_objects/_zero_config_null_source.js | 2 +- .../5_ajax_objects/_zero_config_objects.js | 10 +- .../_zero_config_objects_subarrays.js | 10 +- .../tests_onhold/5_ajax_objects/aaSorting.js | 140 +- .../tests_onhold/5_ajax_objects/aaSortingFixed.js | 30 +- .../5_ajax_objects/aoColumns.bSearchable.js | 30 +- .../5_ajax_objects/aoColumns.bSortable.js | 30 +- .../5_ajax_objects/aoColumns.bUseRendered.js | 30 +- .../5_ajax_objects/aoColumns.bVisible.js | 30 +- .../5_ajax_objects/aoColumns.fnRender.js | 62 +- .../5_ajax_objects/aoColumns.iDataSort.js | 20 +- .../5_ajax_objects/aoColumns.sClass.js | 30 +- .../tests_onhold/5_ajax_objects/aoColumns.sName.js | 10 +- .../5_ajax_objects/aoColumns.sTitle.js | 30 +- .../5_ajax_objects/aoColumns.sWidth.js | 30 +- .../tests_onhold/5_ajax_objects/aoSearchCols.js | 60 +- .../tests_onhold/5_ajax_objects/asStripClasses.js | 40 +- .../tests_onhold/5_ajax_objects/bAutoWidth.js | 30 +- .../tests_onhold/5_ajax_objects/bFilter.js | 30 +- .../tests_onhold/5_ajax_objects/bInfo.js | 30 +- .../tests_onhold/5_ajax_objects/bLengthChange.js | 30 +- .../tests_onhold/5_ajax_objects/bPaginate.js | 30 +- .../tests_onhold/5_ajax_objects/bProcessing.js | 30 +- .../tests_onhold/5_ajax_objects/bServerSide.js | 10 +- .../tests_onhold/5_ajax_objects/bSort.js | 30 +- .../tests_onhold/5_ajax_objects/bSortClasses.js | 30 +- .../tests_onhold/5_ajax_objects/fnDrawCallback.js | 40 +- .../5_ajax_objects/fnHeaderCallback.js | 90 +- .../tests_onhold/5_ajax_objects/fnInitComplete.js | 50 +- .../tests_onhold/5_ajax_objects/fnRowCallback.js | 60 +- .../tests_onhold/5_ajax_objects/fnServerData.js | 40 +- .../tests_onhold/5_ajax_objects/iDisplayLength.js | 40 +- .../5_ajax_objects/oLanguage.oPaginate.js | 20 +- .../tests_onhold/5_ajax_objects/oLanguage.sInfo.js | 70 +- .../5_ajax_objects/oLanguage.sInfoEmpty.js | 30 +- .../5_ajax_objects/oLanguage.sInfoPostFix.js | 40 +- .../5_ajax_objects/oLanguage.sLengthMenu.js | 40 +- .../5_ajax_objects/oLanguage.sProcessing.js | 20 +- .../5_ajax_objects/oLanguage.sSearch.js | 30 +- .../tests_onhold/5_ajax_objects/oLanguage.sUrl.js | 20 +- .../5_ajax_objects/oLanguage.sZeroRecords.js | 20 +- .../tests_onhold/5_ajax_objects/oSearch.js | 60 +- .../tests_onhold/5_ajax_objects/sAjaxSource.js | 10 +- .../tests_onhold/5_ajax_objects/sDom.js | 70 +- .../tests_onhold/5_ajax_objects/sPaginationType.js | 20 +- .../static/packages/DT_bootstrap/DT_bootstrap.css | 179 +++ .../static/packages/DT_bootstrap/DT_bootstrap.js | 159 +++ .../wqflask/static/packages/DT_bootstrap/images | 1 + wqflask/wqflask/templates/marker_regression.html | 26 +- 87 files changed, 3058 insertions(+), 2210 deletions(-) create mode 100644 wqflask/wqflask/static/packages/DT_bootstrap/DT_bootstrap.css create mode 100644 wqflask/wqflask/static/packages/DT_bootstrap/DT_bootstrap.js create mode 120000 wqflask/wqflask/static/packages/DT_bootstrap/images (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c06ab7e8..81bf3825 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -462,6 +462,11 @@ class MarkerRegression(object): no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + + #for i, marker in enumerate(trimmed_genotype_data): + # if i > 10: + # break + # print("genotype is:", pf(marker)) #print("trimmed genotype data is:", pf(trimmed_genotype_data)) @@ -471,8 +476,17 @@ class MarkerRegression(object): #prep_data.PrepData(self.vals, genotype_data) pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) + print("genotypes was:", pf(trimmed_genotype_data)) + for item in trimmed_genotype_data: + if type(item) != type(list()): + print(" --->", type(item)) + for counter, part in enumerate(item): + if type(part) != type(float()): + print(" ------>", type(part), " : ", part) + if counter % 100 == 0: + print(" ------>", type(part)) genotypes = np.array(trimmed_genotype_data).T - print("genotypes is", pf(genotypes)) + print("genotypes is:", pf(genotypes)) #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, # self.dataset.group.name + '.snps.new')).T @@ -491,7 +505,7 @@ class MarkerRegression(object): REML=True, refit=False) - print("p_values is:", pf(len(p_values))) + #print("p_values is:", pf(len(p_values))) self.dataset.group.markers.add_pvalues(p_values) @@ -503,11 +517,11 @@ class MarkerRegression(object): # nperm=self.num_perm) self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] - print("self.lrs_values is:", pf(self.lrs_values)) + #print("self.lrs_values is:", pf(self.lrs_values)) lrs_values_sorted = sorted(self.lrs_values) - print("lrs_values_sorted is:", pf(lrs_values_sorted)) - print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) + #print("lrs_values_sorted is:", pf(lrs_values_sorted)) + #print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) lrs_values_length = len(lrs_values_sorted) @@ -705,6 +719,8 @@ class MarkerRegression(object): try: genotype = float(genotype) except ValueError: + genotype = np.nan + print("Couldn't convert to float:", genotype) pass new_genotypes.append(genotype) trimmed_genotype_data.append(new_genotypes) diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index 8c74fe74..b926592b 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -28,11 +28,10 @@ class Marker(object): class ConvertGenoFile(object): - def __init__(self, input_file, output_file, file_type): + def __init__(self, input_file, output_file): self.input_file = input_file self.output_file = output_file - self.file_type = file_type self.mb_exists = False self.markers = [] @@ -58,10 +57,10 @@ class ConvertGenoFile(object): self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: - if self.file_type == "geno": - self.process_csv() - elif self.file_type == "snps": - self.process_snps_file() + #if self.file_type == "geno": + self.process_csv() + #elif self.file_type == "snps": + # self.process_snps_file() #def process_row(self, row): @@ -87,7 +86,10 @@ class ConvertGenoFile(object): else: genotypes = row_items[3:] for item_count, genotype in enumerate(genotypes): - this_marker.genotypes.append(self.configurations[genotype.upper()]) + if genotype.upper() in self.configurations: + this_marker.genotypes.append(self.configurations[genotype.upper()]) + else: + this_marker.genotypes.append("NA") #print("this_marker is:", pf(this_marker.__dict__)) @@ -111,6 +113,8 @@ class ConvertGenoFile(object): for self.latest_row_pos, row in enumerate(self.input_fh): self.latest_row_value = row # Take care of headers + if not row.strip(): + continue if row.startswith('#'): continue if row.startswith('Chr'): @@ -134,7 +138,8 @@ class ConvertGenoFile(object): for input_file in glob.glob("*.geno"): group_name = input_file.split('.')[0] output_file = os.path.join(new_directory, group_name + ".json") - print("%s -> %s" % (input_file, output_file)) + print("%s -> %s" % ( + os.path.join(old_directory, input_file), output_file)) convertob = ConvertGenoFile(input_file, output_file) try: convertob.convert() @@ -146,16 +151,16 @@ class ConvertGenoFile(object): print(" Exception:", why) print(traceback.print_exc()) - print(" Found in row %i at tabular column %i" % (convertob.latest_row_pos, + print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - def process_snps_file(cls, snps_file, new_directory): - output_file = os.path.join(new_directory, "mouse_families.json") - print("%s -> %s" % (snps_file, output_file)) - convertob = ConvertGenoFile(input_file, output_file) + #def process_snps_file(cls, snps_file, new_directory): + # output_file = os.path.join(new_directory, "mouse_families.json") + # print("%s -> %s" % (snps_file, output_file)) + # convertob = ConvertGenoFile(input_file, output_file) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 015c2e14..d0f379dd 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -54,27 +54,33 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) def calculateKinship(W): - """ - W is an n x m matrix encoding SNP minor alleles. - - This function takes a matrix oF SNPs, imputes missing values with the maf, - normalizes the resulting vectors and returns the RRM matrix. - """ - n = W.shape[0] - m = W.shape[1] - keep = [] - for i in range(m): - mn = W[True - np.isnan(W[:,i]),i].mean() - W[np.isnan(W[:,i]),i] = mn - vr = W[:,i].var() - if vr == 0: continue - - keep.append(i) - W[:,i] = (W[:,i] - mn) / np.sqrt(vr) - - W = W[:,keep] - K = matrixMult(W,W.T) * 1.0/float(m) - return K + """ + W is an n x m matrix encoding SNP minor alleles. + + This function takes a matrix oF SNPs, imputes missing values with the maf, + normalizes the resulting vectors and returns the RRM matrix. + """ + n = W.shape[0] + m = W.shape[1] + print("n is:", n) + print("m is:", m) + keep = [] + for i in range(m): + print("type of W[:,i]:", pf(W[:,i])) + foo = np.isnan(W[:,i]) + print("type of foo:", type(foo)) + mn = W[True - foo,i] + print("type of mn is:", type(mn)) + mn = mn.mean() + W[np.isnan(W[:,i]),i] = mn + vr = W[:,i].var() + if vr == 0: + continue + keep.append(i) + W[:,i] = (W[:,i] - mn) / np.sqrt(vr) + W = W[:,keep] + K = np.dot(W,W.T) * 1.0/float(m) + return K def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): """ diff --git a/wqflask/wqflask/static/new/packages/DataTables/css/demo_page.css b/wqflask/wqflask/static/new/packages/DataTables/css/demo_page.css index 89c62bb7..ba5b2a6c 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/css/demo_page.css +++ b/wqflask/wqflask/static/new/packages/DataTables/css/demo_page.css @@ -104,4 +104,19 @@ height: 100px; width: 100%; overflow: auto; -} \ No newline at end of file +} + +#dt_example code { + font-family: Menlo, Monaco, Consolas, "Courier New", monospace; + padding: 2px 4px !important; + white-space: nowrap; + font-size: 0.9em; + + color: #D14; + background-color: #F7F7F9; + + border: 1px solid #E1E1E8; + -webkit-border-radius: 3px; + -moz-border-radius: 3px; + border-radius: 3px; +} diff --git a/wqflask/wqflask/static/new/packages/DataTables/css/demo_table.css b/wqflask/wqflask/static/new/packages/DataTables/css/demo_table.css index f41a0042..12f352da 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/css/demo_table.css +++ b/wqflask/wqflask/static/new/packages/DataTables/css/demo_table.css @@ -201,7 +201,8 @@ table.display td.center { background: url('../images/sort_desc_disabled.png') no-repeat center right; } -th:active { +table.display thead th:active, +table.display thead td:active { outline: none; } diff --git a/wqflask/wqflask/static/new/packages/DataTables/css/demo_table_jui.css b/wqflask/wqflask/static/new/packages/DataTables/css/demo_table_jui.css index de7c8426..a210af51 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/css/demo_table_jui.css +++ b/wqflask/wqflask/static/new/packages/DataTables/css/demo_table_jui.css @@ -82,7 +82,6 @@ div.dataTables_wrapper .ui-widget-header { table.display thead th div.DataTables_sort_wrapper { position: relative; padding-right: 20px; - padding-right: 20px; } table.display thead th div.DataTables_sort_wrapper span { @@ -147,30 +146,6 @@ table.display thead th div.DataTables_sort_wrapper span { text-align: right; } -/* Pagination nested */ -.paginate_disabled_previous, .paginate_enabled_previous, .paginate_disabled_next, .paginate_enabled_next { - height: 19px; - width: 19px; - margin-left: 3px; - float: left; -} - -.paginate_disabled_previous { - background-image: url('../images/back_disabled.jpg'); -} - -.paginate_enabled_previous { - background-image: url('../images/back_enabled.jpg'); -} - -.paginate_disabled_next { - background-image: url('../images/forward_disabled.jpg'); -} - -.paginate_enabled_next { - background-image: url('../images/forward_enabled.jpg'); -} - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * diff --git a/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css b/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css index 83df98ea..7da7faec 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css +++ b/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css @@ -201,7 +201,8 @@ table.dataTable tr.even td.sorting_3 { background-color: #F9F9FF; } .sorting_asc_disabled { background: url('../images/sort_asc_disabled.png') no-repeat center right; } .sorting_desc_disabled { background: url('../images/sort_desc_disabled.png') no-repeat center right; } -table.dataTable th:active { +table.dataTable thead th:active, +table.dataTable thead td:active { outline: none; } diff --git a/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables_themeroller.css b/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables_themeroller.css index 55661c6d..cf1d4ed7 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables_themeroller.css +++ b/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables_themeroller.css @@ -216,7 +216,6 @@ table.dataTable tr.even td.sorting_3 { background-color: #F9F9FF; } table.dataTable thead th div.DataTables_sort_wrapper { position: relative; padding-right: 20px; - padding-right: 20px; } table.dataTable thead th div.DataTables_sort_wrapper span { diff --git a/wqflask/wqflask/static/new/packages/DataTables/js/jquery.dataTables.js b/wqflask/wqflask/static/new/packages/DataTables/js/jquery.dataTables.js index ae5d1750..1d8a220b 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/js/jquery.dataTables.js +++ b/wqflask/wqflask/static/new/packages/DataTables/js/jquery.dataTables.js @@ -1,7 +1,7 @@ /** * @summary DataTables * @description Paginate, search and sort HTML tables - * @version 1.9.2 + * @version 1.9.4 * @file jquery.dataTables.js * @author Allan Jardine (www.sprymedia.co.uk) * @contact www.sprymedia.co.uk/contact @@ -21,9 +21,28 @@ */ /*jslint evil: true, undef: true, browser: true */ -/*globals $, jQuery,_fnExternApiFunc,_fnInitialise,_fnInitComplete,_fnLanguageCompat,_fnAddColumn,_fnColumnOptions,_fnAddData,_fnCreateTr,_fnGatherData,_fnBuildHead,_fnDrawHead,_fnDraw,_fnReDraw,_fnAjaxUpdate,_fnAjaxParameters,_fnAjaxUpdateDraw,_fnServerParams,_fnAddOptionsHtml,_fnFeatureHtmlTable,_fnScrollDraw,_fnAdjustColumnSizing,_fnFeatureHtmlFilter,_fnFilterComplete,_fnFilterCustom,_fnFilterColumn,_fnFilter,_fnBuildSearchArray,_fnBuildSearchRow,_fnFilterCreateSearch,_fnDataToSearch,_fnSort,_fnSortAttachListener,_fnSortingClasses,_fnFeatureHtmlPaginate,_fnPageChange,_fnFeatureHtmlInfo,_fnUpdateInfo,_fnFeatureHtmlLength,_fnFeatureHtmlProcessing,_fnProcessingDisplay,_fnVisibleToColumnIndex,_fnColumnIndexToVisible,_fnNodeToDataIndex,_fnVisbleColumns,_fnCalculateEnd,_fnConvertToWidth,_fnCalculateColumnWidths,_fnScrollingWidthAdjust,_fnGetWidestNode,_fnGetMaxLenString,_fnStringToCss,_fnDetectType,_fnSettingsFromNode,_fnGetDataMaster,_fnGetTrNodes,_fnGetTdNodes,_fnEscapeRegex,_fnDeleteIndex,_fnReOrderIndex,_fnColumnOrdering,_fnLog,_fnClearTable,_fnSaveState,_fnLoadState,_fnCreateCookie,_fnReadCookie,_fnDetectHeader,_fnGetUniqueThs,_fnScrollBarWidth,_fnApplyToChildren,_fnMap,_fnGetRowData,_fnGetCellData,_fnSetCellData,_fnGetObjectDataFn,_fnSetObjectDataFn,_fnApplyColumnDefs,_fnBindAction,_fnCallbackReg,_fnCallbackFire,_fnJsonString,_fnRender,_fnNodeToColumnIndex,_fnInfoMacros*/ +/*globals $, jQuery,define,_fnExternApiFunc,_fnInitialise,_fnInitComplete,_fnLanguageCompat,_fnAddColumn,_fnColumnOptions,_fnAddData,_fnCreateTr,_fnGatherData,_fnBuildHead,_fnDrawHead,_fnDraw,_fnReDraw,_fnAjaxUpdate,_fnAjaxParameters,_fnAjaxUpdateDraw,_fnServerParams,_fnAddOptionsHtml,_fnFeatureHtmlTable,_fnScrollDraw,_fnAdjustColumnSizing,_fnFeatureHtmlFilter,_fnFilterComplete,_fnFilterCustom,_fnFilterColumn,_fnFilter,_fnBuildSearchArray,_fnBuildSearchRow,_fnFilterCreateSearch,_fnDataToSearch,_fnSort,_fnSortAttachListener,_fnSortingClasses,_fnFeatureHtmlPaginate,_fnPageChange,_fnFeatureHtmlInfo,_fnUpdateInfo,_fnFeatureHtmlLength,_fnFeatureHtmlProcessing,_fnProcessingDisplay,_fnVisibleToColumnIndex,_fnColumnIndexToVisible,_fnNodeToDataIndex,_fnVisbleColumns,_fnCalculateEnd,_fnConvertToWidth,_fnCalculateColumnWidths,_fnScrollingWidthAdjust,_fnGetWidestNode,_fnGetMaxLenString,_fnStringToCss,_fnDetectType,_fnSettingsFromNode,_fnGetDataMaster,_fnGetTrNodes,_fnGetTdNodes,_fnEscapeRegex,_fnDeleteIndex,_fnReOrderIndex,_fnColumnOrdering,_fnLog,_fnClearTable,_fnSaveState,_fnLoadState,_fnCreateCookie,_fnReadCookie,_fnDetectHeader,_fnGetUniqueThs,_fnScrollBarWidth,_fnApplyToChildren,_fnMap,_fnGetRowData,_fnGetCellData,_fnSetCellData,_fnGetObjectDataFn,_fnSetObjectDataFn,_fnApplyColumnDefs,_fnBindAction,_fnCallbackReg,_fnCallbackFire,_fnJsonString,_fnRender,_fnNodeToColumnIndex,_fnInfoMacros,_fnBrowserDetect,_fnGetColumns*/ -(/** @lends */function($, window, document, undefined) { +(/** @lends */function( window, document, undefined ) { + +(function( factory ) { + "use strict"; + + // Define as an AMD module if possible + if ( typeof define === 'function' && define.amd ) + { + define( ['jquery'], factory ); + } + /* Define using browser globals otherwise + * Prevent multiple instantiations if the script is loaded twice + */ + else if ( jQuery && !jQuery.fn.dataTable ) + { + factory( jQuery ); + } +} +(/** @lends */function( $ ) { + "use strict"; /** * DataTables is a plug-in for the jQuery Javascript library. It is a * highly flexible tool, based upon the foundations of progressive @@ -76,7 +95,7 @@ "nTh": nTh ? nTh : document.createElement('th'), "sTitle": oDefaults.sTitle ? oDefaults.sTitle : nTh ? nTh.innerHTML : '', "aDataSort": oDefaults.aDataSort ? oDefaults.aDataSort : [iCol], - "mDataProp": oDefaults.mDataProp ? oDefaults.oDefaults : iCol + "mData": oDefaults.mData ? oDefaults.oDefaults : iCol } ); oSettings.aoColumns.push( oCol ); @@ -115,7 +134,7 @@ * Apply options for a column * @param {object} oSettings dataTables settings object * @param {int} iCol column index to consider - * @param {object} oOptions object with sType, bVisible and bSearchable + * @param {object} oOptions object with sType, bVisible and bSearchable etc * @memberof DataTable#oApi */ function _fnColumnOptions( oSettings, iCol, oOptions ) @@ -125,6 +144,12 @@ /* User specified column options */ if ( oOptions !== undefined && oOptions !== null ) { + /* Backwards compatibility for mDataProp */ + if ( oOptions.mDataProp && !oOptions.mData ) + { + oOptions.mData = oOptions.mDataProp; + } + if ( oOptions.sType !== undefined ) { oCol.sType = oOptions.sType; @@ -145,8 +170,19 @@ } /* Cache the data get and set functions for speed */ - oCol.fnGetData = _fnGetObjectDataFn( oCol.mDataProp ); - oCol.fnSetData = _fnSetObjectDataFn( oCol.mDataProp ); + var mRender = oCol.mRender ? _fnGetObjectDataFn( oCol.mRender ) : null; + var mData = _fnGetObjectDataFn( oCol.mData ); + + oCol.fnGetData = function (oData, sSpecific) { + var innerData = mData( oData, sSpecific ); + + if ( oCol.mRender && (sSpecific && sSpecific !== '') ) + { + return mRender( innerData, sSpecific, oData ); + } + return innerData; + }; + oCol.fnSetData = _fnSetObjectDataFn( oCol.mData ); /* Feature sorting overrides column specific when off */ if ( !oSettings.oFeatures.bSort ) @@ -161,11 +197,10 @@ oCol.sSortingClass = oSettings.oClasses.sSortableNone; oCol.sSortingClassJUI = ""; } - else if ( oCol.bSortable || - ($.inArray('asc', oCol.asSorting) == -1 && $.inArray('desc', oCol.asSorting) == -1) ) + else if ( $.inArray('asc', oCol.asSorting) == -1 && $.inArray('desc', oCol.asSorting) == -1 ) { - oCol.sSortingClass = oSettings.oClasses.sSortable; - oCol.sSortingClassJUI = oSettings.oClasses.sSortJUI; + oCol.sSortingClass = oSettings.oClasses.sSortable; + oCol.sSortingClassJUI = oSettings.oClasses.sSortJUI; } else if ( $.inArray('asc', oCol.asSorting) != -1 && $.inArray('desc', oCol.asSorting) == -1 ) { @@ -188,7 +223,7 @@ */ function _fnAdjustColumnSizing ( oSettings ) { - /* Not interested in doing column width calculation if autowidth is disabled */ + /* Not interested in doing column width calculation if auto-width is disabled */ if ( oSettings.oFeatures.bAutoWidth === false ) { return false; @@ -212,22 +247,11 @@ */ function _fnVisibleToColumnIndex( oSettings, iMatch ) { - var iColumn = -1; - - for ( var i=0 ; i tag - remove it */ - sSearch = sSearch.replace(/\n/g," ").replace(/\r/g,""); + sSearch = $('
').html(sSearch).text(); } - return sSearch; + // Strip newline characters + return sSearch.replace( /[\n\r]/g, " " ); } /** @@ -2280,7 +2393,7 @@ * @param {string} sSearch string to search for * @param {bool} bRegex treat as a regular expression or not * @param {bool} bSmart perform smart filtering or not - * @param {bool} bCaseInsensitive Do case insenstive matching or not + * @param {bool} bCaseInsensitive Do case insensitive matching or not * @returns {RegExp} constructed object * @memberof DataTable#oApi */ @@ -2335,7 +2448,7 @@ /** - * scape a string stuch that it can be used in a regular expression + * scape a string such that it can be used in a regular expression * @param {string} sVal string to escape * @returns {string} escaped string * @memberof DataTable#oApi @@ -2348,7 +2461,6 @@ } - /** * Generate the node required for the info display * @param {object} oSettings dataTables settings object @@ -2399,25 +2511,20 @@ iTotal = oSettings.fnRecordsDisplay(), sOut; - if ( iTotal === 0 && iTotal == iMax ) + if ( iTotal === 0 ) { /* Empty record set */ sOut = oLang.sInfoEmpty; } - else if ( iTotal === 0 ) - { - /* Empty record set after filtering */ - sOut = oLang.sInfoEmpty +' '+ oLang.sInfoFiltered; - } - else if ( iTotal == iMax ) - { + else { /* Normal record set */ sOut = oLang.sInfo; } - else + + if ( iTotal != iMax ) { /* Record set after filtering */ - sOut = oLang.sInfo +' '+ oLang.sInfoFiltered; + sOut += ' ' + oLang.sInfoFiltered; } // Convert the macros @@ -2458,10 +2565,10 @@ } return str. - replace('_START_', sStart). - replace('_END_', sEnd). - replace('_TOTAL_', sTotal). - replace('_MAX_', sMax); + replace(/_START_/g, sStart). + replace(/_END_/g, sEnd). + replace(/_TOTAL_/g, sTotal). + replace(/_MAX_/g, sMax); } @@ -2712,7 +2819,7 @@ /** - * Rcalculate the end point based on the start point + * Recalculate the end point based on the start point * @param {object} oSettings dataTables settings object * @memberof DataTable#oApi */ @@ -2816,7 +2923,7 @@ oSettings._iDisplayStart - oSettings._iDisplayLength : 0; - /* Correct for underrun */ + /* Correct for under-run */ if ( oSettings._iDisplayStart < 0 ) { oSettings._iDisplayStart = 0; @@ -2902,8 +3009,6 @@ $(oSettings.oInstance).trigger('processing', [oSettings, bShow]); } - - /** * Add any control elements for the table - specifically scrolling * @param {object} oSettings dataTables settings object @@ -3012,7 +3117,7 @@ /* * Sizing */ - /* When xscrolling add the width and a scroller to move the header with the body */ + /* When x-scrolling add the width and a scroller to move the header with the body */ if ( oSettings.oScroll.sX !== "" ) { nScrollHead.style.width = _fnStringToCss( oSettings.oScroll.sX ); @@ -3095,10 +3200,18 @@ nScrollBody = o.nTable.parentNode, i, iLen, j, jLen, anHeadToSize, anHeadSizers, anFootSizers, anFootToSize, oStyle, iVis, nTheadSize, nTfootSize, - iWidth, aApplied=[], iSanityWidth, + iWidth, aApplied=[], aAppliedFooter=[], iSanityWidth, nScrollFootInner = (o.nTFoot !== null) ? o.nScrollFoot.getElementsByTagName('div')[0] : null, nScrollFootTable = (o.nTFoot !== null) ? nScrollFootInner.getElementsByTagName('table')[0] : null, - ie67 = $.browser.msie && $.browser.version <= 7; + ie67 = o.oBrowser.bScrollOversize, + zeroOut = function(nSizer) { + oStyle = nSizer.style; + oStyle.paddingTop = "0"; + oStyle.paddingBottom = "0"; + oStyle.borderTopWidth = "0"; + oStyle.borderBottomWidth = "0"; + oStyle.height = 0; + }; /* * 1. Re-create the table inside the scrolling div @@ -3110,11 +3223,15 @@ /* Clone the current header and footer elements and then place it into the inner table */ nTheadSize = $(o.nTHead).clone()[0]; o.nTable.insertBefore( nTheadSize, o.nTable.childNodes[0] ); + anHeadToSize = o.nTHead.getElementsByTagName('tr'); + anHeadSizers = nTheadSize.getElementsByTagName('tr'); if ( o.nTFoot !== null ) { nTfootSize = $(o.nTFoot).clone()[0]; o.nTable.insertBefore( nTfootSize, o.nTable.childNodes[1] ); + anFootToSize = o.nTFoot.getElementsByTagName('tr'); + anFootSizers = nTfootSize.getElementsByTagName('tr'); } /* @@ -3123,7 +3240,7 @@ /* Remove old sizing and apply the calculated column widths * Get the unique column headers in the newly created (cloned) header. We want to apply the - * calclated sizes to this header + * calculated sizes to this header */ if ( o.oScroll.sX === "" ) { @@ -3142,7 +3259,7 @@ { _fnApplyToChildren( function(n) { n.style.width = ""; - }, nTfootSize.getElementsByTagName('tr') ); + }, anFootSizers ); } // If scroll collapse is enabled, when we put the headers back into the body for sizing, we @@ -3204,41 +3321,38 @@ /* We want the hidden header to have zero height, so remove padding and borders. Then * set the width based on the real headers */ - anHeadToSize = o.nTHead.getElementsByTagName('tr'); - anHeadSizers = nTheadSize.getElementsByTagName('tr'); - _fnApplyToChildren( function(nSizer, nToSize) { - oStyle = nSizer.style; - oStyle.paddingTop = "0"; - oStyle.paddingBottom = "0"; - oStyle.borderTopWidth = "0"; - oStyle.borderBottomWidth = "0"; - oStyle.height = 0; - - iWidth = $(nSizer).width(); - nToSize.style.width = _fnStringToCss( iWidth ); - aApplied.push( iWidth ); - }, anHeadSizers, anHeadToSize ); + // Apply all styles in one pass. Invalidates layout only once because we don't read any + // DOM properties. + _fnApplyToChildren( zeroOut, anHeadSizers ); + + // Read all widths in next pass. Forces layout only once because we do not change + // any DOM properties. + _fnApplyToChildren( function(nSizer) { + aApplied.push( _fnStringToCss( $(nSizer).width() ) ); + }, anHeadSizers ); + + // Apply all widths in final pass. Invalidates layout only once because we do not + // read any DOM properties. + _fnApplyToChildren( function(nToSize, i) { + nToSize.style.width = aApplied[i]; + }, anHeadToSize ); + $(anHeadSizers).height(0); + /* Same again with the footer if we have one */ if ( o.nTFoot !== null ) { - /* Clone the current footer and then place it into the body table as a "hidden header" */ - anFootSizers = nTfootSize.getElementsByTagName('tr'); - anFootToSize = o.nTFoot.getElementsByTagName('tr'); - - _fnApplyToChildren( function(nSizer, nToSize) { - oStyle = nSizer.style; - oStyle.paddingTop = "0"; - oStyle.paddingBottom = "0"; - oStyle.borderTopWidth = "0"; - oStyle.borderBottomWidth = "0"; - oStyle.height = 0; - - iWidth = $(nSizer).width(); - nToSize.style.width = _fnStringToCss( iWidth ); - aApplied.push( iWidth ); - }, anFootSizers, anFootToSize ); + _fnApplyToChildren( zeroOut, anFootSizers ); + + _fnApplyToChildren( function(nSizer) { + aAppliedFooter.push( _fnStringToCss( $(nSizer).width() ) ); + }, anFootSizers ); + + _fnApplyToChildren( function(nToSize, i) { + nToSize.style.width = aAppliedFooter[i]; + }, anFootToSize ); + $(anFootSizers).height(0); } @@ -3249,16 +3363,16 @@ /* "Hide" the header and footer that we used for the sizing. We want to also fix their width * to what they currently are */ - _fnApplyToChildren( function(nSizer) { + _fnApplyToChildren( function(nSizer, i) { nSizer.innerHTML = ""; - nSizer.style.width = _fnStringToCss( aApplied.shift() ); + nSizer.style.width = aApplied[i]; }, anHeadSizers ); if ( o.nTFoot !== null ) { - _fnApplyToChildren( function(nSizer) { + _fnApplyToChildren( function(nSizer, i) { nSizer.innerHTML = ""; - nSizer.style.width = _fnStringToCss( aApplied.shift() ); + nSizer.style.width = aAppliedFooter[i]; }, anFootSizers ); } @@ -3281,11 +3395,11 @@ /* Apply the calculated minimum width to the table wrappers */ nScrollBody.style.width = _fnStringToCss( iCorrection ); - nScrollHeadInner.parentNode.style.width = _fnStringToCss( iCorrection ); + o.nScrollHead.style.width = _fnStringToCss( iCorrection ); if ( o.nTFoot !== null ) { - nScrollFootInner.parentNode.style.width = _fnStringToCss( iCorrection ); + o.nScrollFoot.style.width = _fnStringToCss( iCorrection ); } /* And give the user a warning that we've stopped the table getting too small */ @@ -3304,11 +3418,11 @@ else { nScrollBody.style.width = _fnStringToCss( '100%' ); - nScrollHeadInner.parentNode.style.width = _fnStringToCss( '100%' ); + o.nScrollHead.style.width = _fnStringToCss( '100%' ); if ( o.nTFoot !== null ) { - nScrollFootInner.parentNode.style.width = _fnStringToCss( '100%' ); + o.nScrollFoot.style.width = _fnStringToCss( '100%' ); } } @@ -3357,7 +3471,7 @@ nScrollFootInner.style.paddingRight = bScrolling ? o.oScroll.iBarWidth+"px" : "0px"; } - /* Adjust the position of the header incase we loose the y-scrollbar */ + /* Adjust the position of the header in case we loose the y-scrollbar */ $(nScrollBody).scroll(); /* If sorting or filtering has occurred, jump the scrolling back to the top */ @@ -3378,27 +3492,34 @@ */ function _fnApplyToChildren( fn, an1, an2 ) { - for ( var i=0, iLen=an1.length ; i= iColumns ) + for (i = 0, iClass = 1; i < aaSort.length; i++) { - for ( i=0 ; i 0 && sCurrentClass.indexOf(sNewClass) == -1 ) { - iClass++; + /* We need to add a class */ + nTds[i].className = sCurrentClass + " " + sNewClass; } } } @@ -4344,7 +4456,7 @@ $.extend( true, oSettings.aoPreSearchCols, oData.aoSearchCols ); /* Column visibility state - * Pass back visibiliy settings to the init handler, but to do not here override + * Pass back visibility settings to the init handler, but to do not here override * the init object that the user might have passed in */ oInit.saved_aoColumns = []; @@ -4396,35 +4508,50 @@ } /* Are we going to go over the cookie limit of 4KiB? If so, try to delete a cookies - * belonging to DataTables. This is FAR from bullet proof + * belonging to DataTables. */ - var sOldName="", iOldTime=9999999999999; - var iLength = _fnReadCookie( sNameFile )!==null ? document.cookie.length : - sFullCookie.length + document.cookie.length; + var + aCookies =document.cookie.split(';'), + iNewCookieLen = sFullCookie.split(';')[0].length, + aOldCookies = []; - if ( iLength+10 > 4096 ) /* Magic 10 for padding */ + if ( iNewCookieLen+document.cookie.length+10 > 4096 ) /* Magic 10 for padding */ { - var aCookies =document.cookie.split(';'); for ( var i=0, iLen=aCookies.length ; i'+ + '
'+ + '
'+ + '
'+ + '
')[0]; + + document.body.appendChild( n ); + oSettings.oBrowser.bScrollOversize = $('#DT_BrowserTest', n)[0].offsetWidth === 100 ? true : false; + document.body.removeChild( n ); + } + /** * Perform a jQuery selector action on the table's TR elements (from the tbody) and * return the resulting jQuery object. @@ -4913,11 +5068,11 @@ /** * Almost identical to $ in operation, but in this case returns the data for the matched * rows - as such, the jQuery selector used should match TR row nodes or TD/TH cell nodes - * rather than any decendents, so the data can be obtained for the row/cell. If matching + * rather than any descendants, so the data can be obtained for the row/cell. If matching * rows are found, the data returned is the original data array/object that was used to * create the row (or a generated array if from a DOM source). * - * This method is often useful incombination with $ where both functions are given the + * This method is often useful in-combination with $ where both functions are given the * same parameters and the array indexes will match identically. * @param {string|node|jQuery} sSelector jQuery selector or node collection to act on * @param {object} [oOpts] Optional parameters for modifying the rows to be included @@ -4981,8 +5136,8 @@ *
    *
  • 1D array of data - add a single row with the data provided
  • *
  • 2D array of arrays - add multiple rows in a single call
  • - *
  • object - data object when using mDataProp
  • - *
  • array of objects - multiple data objects when using mDataProp
  • + *
  • object - data object when using mData
  • + *
  • array of objects - multiple data objects when using mData
  • *
* @param {bool} [bRedraw=true] redraw the table or not * @returns {array} An array of integers, representing the list of indexes in @@ -5256,20 +5411,23 @@ var nBody = oSettings.nTBody; var i, iLen; - bRemove = (bRemove===undefined) ? false : true; + bRemove = (bRemove===undefined) ? false : bRemove; /* Flag to note that the table is currently being destroyed - no action should be taken */ oSettings.bDestroying = true; /* Fire off the destroy callbacks for plug-ins etc */ _fnCallbackFire( oSettings, "aoDestroyCallback", "destroy", [oSettings] ); - - /* Restore hidden columns */ - for ( i=0, iLen=oSettings.aoColumns.length ; i