diff options
50 files changed, 261 insertions, 8584 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index a4eaaa2e..9ca880d0 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -29,7 +29,6 @@ import json import gzip import cPickle as pickle import itertools -from operator import itemgetter from redis import Redis Redis = Redis() @@ -316,9 +315,6 @@ class DatasetGroup(object): return mapping_id, mapping_names - def get_specified_markers(self, markers = []): - self.markers = HumanMarkers(self.name, markers) - def get_markers(self): logger.debug("self.species is:", self.species) @@ -449,7 +445,6 @@ def datasets(group_name, this_group = None): group_name, webqtlConfig.PUBLICTHRESH, "'" + group_name + "'", webqtlConfig.PUBLICTHRESH)) - #for tissue_name, dataset in itertools.groupby(the_results, itemgetter(0)): for dataset_item in the_results: tissue_name = dataset_item[0] dataset = dataset_item[1] @@ -457,14 +452,10 @@ def datasets(group_name, this_group = None): if tissue_name in ['#PublishFreeze', '#GenoFreeze']: dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) else: - dataset_sub_menu = [item[1:] for item in dataset] - tissue_already_exists = False - tissue_position = None for i, tissue_dict in enumerate(dataset_menu): if tissue_dict['tissue'] == tissue_name: tissue_already_exists = True - tissue_position = i break if tissue_already_exists: @@ -719,20 +710,6 @@ class PhenotypeDataSet(DataSet): # (Urgently?) Need to write this pass - def get_trait_list(self): - query = """ - select PublishXRef.Id - from PublishXRef, PublishFreeze - where PublishFreeze.InbredSetId=PublishXRef.InbredSetId - and PublishFreeze.Id = {} - """.format(escape(str(self.id))) - logger.sql(query) - results = g.db.execute(query).fetchall() - trait_data = {} - for trait in results: - trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) - return trait_data - def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: @@ -746,7 +723,7 @@ class PhenotypeDataSet(DataSet): #of the post-publication description if this_trait.confidential: this_trait.description_display = "" - continue # for now + continue # for now, because no authorization features if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( privilege=self.privilege, @@ -770,9 +747,7 @@ class PhenotypeDataSet(DataSet): #LRS and its location this_trait.LRS_score_repr = "N/A" - this_trait.LRS_score_value = 0 this_trait.LRS_location_repr = "N/A" - this_trait.LRS_location_value = 1000000 if this_trait.lrs: query = """ @@ -789,17 +764,7 @@ class PhenotypeDataSet(DataSet): LRS_Chr = result[0] LRS_Mb = result[1] - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = LRS_score_value = this_trait.lrs this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): @@ -861,40 +826,13 @@ class GenotypeDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) - def get_trait_list(self): - query = """ - select Geno.Name - from Geno, GenoXRef - where GenoXRef.GenoId = Geno.Id - and GenoFreezeId = {} - """.format(escape(str(self.id))) - logger.sql(query) - results = g.db.execute(query).fetchall() - trait_data = {} - for trait in results: - trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) - return trait_data - def get_trait_info(self, trait_list, species=None): for this_trait in trait_list: if not this_trait.haveinfo: this_trait.retrieveInfo() - #XZ: trait_location_value is used for sorting - trait_location_repr = 'N/A' - trait_location_value = 1000000 - if this_trait.chr and this_trait.mb: - try: - trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - except: - if this_trait.chr.upper() == 'X': - trait_location_value = 20*1000 + this_trait.mb - else: - trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) ) - this_trait.location_value = trait_location_value def retrieve_sample_data(self, trait): query = """ @@ -989,20 +927,6 @@ class MrnaAssayDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) - def get_trait_list_1(self): - query = """ - select ProbeSet.Name - from ProbeSet, ProbeSetXRef - where ProbeSetXRef.ProbeSetId = ProbeSet.Id - and ProbeSetFreezeId = {} - """.format(escape(str(self.id))) - logger.sql(query) - results = g.db.execute(query).fetchall() - trait_data = {} - for trait in results: - trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) - return trait_data - def get_trait_info(self, trait_list=None, species=''): # Note: setting trait_list to [] is probably not a great idea. @@ -1034,27 +958,8 @@ class MrnaAssayDataSet(DataSet): # Save it for the jinja2 template this_trait.description_display = description_display - #XZ: trait_location_value is used for sorting - trait_location_repr = 'N/A' - trait_location_value = 1000000 - if this_trait.chr and this_trait.mb: - #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y") - #This is so we can convert the location to a number used for sorting - trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb) - #try: - # trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - #except ValueError: - # if this_trait.chr.upper() == 'X': - # trait_location_value = 20*1000 + this_trait.mb - # else: - # trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 + - # this_trait.mb) - - #ZS: Put this in function currently called "convert_location_to_value" - this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, - float(this_trait.mb)) - this_trait.location_value = trait_location_value + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb)) #Get mean expression value query = ( @@ -1076,9 +981,7 @@ class MrnaAssayDataSet(DataSet): #LRS and its location this_trait.LRS_score_repr = 'N/A' - this_trait.LRS_score_value = 0 this_trait.LRS_location_repr = 'N/A' - this_trait.LRS_location_value = 1000000 #Max LRS and its Locus location if this_trait.lrs and this_trait.locus: @@ -1093,40 +996,10 @@ class MrnaAssayDataSet(DataSet): if result: lrs_chr, lrs_mb = result - #XZ: LRS_location_value is used for sorting - lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb) this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = this_trait.lrs this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb)) - - def convert_location_to_value(self, chromosome, mb): - try: - location_value = int(chromosome)*1000 + float(mb) - except ValueError: - if chromosome.upper() == 'X': - location_value = 20*1000 + float(mb) - else: - location_value = (ord(str(chromosome).upper()[0])*1000 + - float(mb)) - - return location_value - - def get_sequence(self): - query = """ - SELECT - ProbeSet.BlatSeq - FROM - ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSet.Id=ProbeSetXRef.ProbeSetId and - ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and - ProbeSet.Name = %s - ProbeSetFreeze.Name = %s - """ % (escape(self.name), escape(self.dataset.name)) - logger.sql(query) - results = g.db.execute(query).fetchone() - return results[0] + return trait_list def retrieve_sample_data(self, trait): query = """ @@ -1150,7 +1023,6 @@ class MrnaAssayDataSet(DataSet): #logger.debug("RETRIEVED RESULTS HERE:", results) return results - def retrieve_genes(self, column_name): query = """ select ProbeSet.Name, ProbeSet.%s @@ -1204,19 +1076,6 @@ class TempDataSet(DataSet): desc = self.handle_pca(desc) return desc - def get_group(self): - query = """ - SELECT - InbredSet.Name, InbredSet.Id - FROM - InbredSet, Temp - WHERE - Temp.InbredSetId = InbredSet.Id AND - Temp.Name = "%s" - """ % self.name - logger.sql(query) - self.group, self.group_id = g.db.execute(query).fetchone() - def retrieve_sample_data(self, trait): query = """ SELECT diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index eb836e6c..6fec5dcd 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -18,21 +18,11 @@ class MrnaAssayTissueData(object): def __init__(self, gene_symbols=None): self.gene_symbols = gene_symbols - self.have_data = False if self.gene_symbols == None: self.gene_symbols = [] - #print("self.gene_symbols:", self.gene_symbols) - self.data = collections.defaultdict(Bunch) - #self.gene_id_dict ={} - #self.data_id_dict = {} - #self.chr_dict = {} - #self.mb_dict = {} - #self.desc_dict = {} - #self.probe_target_desc_dict = {} - query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description from ( select Symbol, max(Mean) as maxmean @@ -52,7 +42,6 @@ class MrnaAssayTissueData(object): in_clause = db_tools.create_in_clause(gene_symbols) #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower - query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; @@ -67,9 +56,7 @@ class MrnaAssayTissueData(object): for result in results: symbol = result[0] - #if symbol.lower() in [gene_symbol.lower() for gene_symbol in gene_symbols]: if symbol.lower() in lower_symbols: - #gene_symbols.append(symbol) symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId @@ -79,8 +66,6 @@ class MrnaAssayTissueData(object): self.data[symbol].description = result.description self.data[symbol].probe_target_description = result.Probe_Target_Description - print("self.data: ", pf(self.data)) - ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, @@ -107,53 +92,4 @@ class MrnaAssayTissueData(object): else: symbol_values_dict[result.Symbol.lower()].append(result.value) - #for symbol in self.data: - # data_id = self.data[symbol].data_id - # symbol_values_dict[symbol] = self.get_tissue_values(data_id) - - - return symbol_values_dict - - - #def get_tissue_values(self, data_id): - # """Gets the tissue values for a particular gene""" - # - # tissue_values=[] - # - # query = """SELECT value, id - # FROM TissueProbeSetData - # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id)) - # - # #try : - # results = g.db.execute(query).fetchall() - # for result in results: - # tissue_values.append(result.value) - # #symbol_values_dict[symbol] = value_list - # #except: - # # symbol_values_pairs[symbol] = None - # - # return tissue_values - -######################################################################################################## -#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol -#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. -# key is symbol, value is one list of expression values of one probeSet. -#function: wrapper function for getSymbolValuePairDict function -# build gene symbol list if necessary, cut it into small lists if necessary, -# then call getSymbolValuePairDict function and merge the results. -######################################################################################################## - -#def get_trait_symbol_and_tissue_values(symbol_list=None): -# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) -# -# #symbolList, -# #geneIdDict, -# #dataIdDict, -# #ChrDict, -# #MbDict, -# #descDict, -# #pTargetDescDict = getTissueProbeSetXRefInfo( -# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) -# -# if len(tissue_data.gene_symbols): -# return get_symbol_values_pairs(tissue_data) + return symbol_values_dict
\ No newline at end of file diff --git a/wqflask/base/species.py b/wqflask/base/species.py index ce763fc3..4ac2213c 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -18,19 +18,6 @@ class TheSpecies(object): self.dataset = dataset #print("self.dataset is:", pf(self.dataset.__dict__)) self.chromosomes = Chromosomes(self.dataset) - self.genome_mb_length = self.chromosomes.get_genome_mb_length() - - #@property - #def chromosomes(self): - # chromosomes = [("All", -1)] - # - # for counter, genotype in enumerate(self.dataset.group.genotype): - # if len(genotype) > 1: - # chromosomes.append((genotype.name, counter)) - # - # print("chromosomes is: ", pf(chromosomes)) - # - # return chromosomes class IndChromosome(object): def __init__(self, name, length): @@ -42,16 +29,11 @@ class IndChromosome(object): """Chromosome length in megabases""" return self.length / 1000000 - def set_cm_length(self, genofile_chr): - self.cm_length = genofile_chr[-1].cM - genofile_chr[0].cM - - class Chromosomes(object): def __init__(self, dataset): self.dataset = dataset self.chromosomes = collections.OrderedDict() - query = """ Select Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet @@ -64,64 +46,4 @@ class Chromosomes(object): results = g.db.execute(query).fetchall() for item in results: - self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) - - self.set_mb_graph_interval() - #self.get_cm_length_list() - - - def set_mb_graph_interval(self): - """Empirical megabase interval""" - - if self.chromosomes: - self.mb_graph_interval = self.get_genome_mb_length()/(len(self.chromosomes)*12) - else: - self.mb_graph_interval = 1 - - #if self.chromosomes: - #assert self.chromosomes, "Have to add some code back in apparently to set it to 1" - #self.mb_graph_interval = self.get_genome_mb_length()/(len(self.chromosomes)*12) - #else: - #self.mb_graph_interval = 1 - - - def get_genome_mb_length(self): - """Gets the sum of each chromosome's length in megabases""" - - return sum([ind_chromosome.mb_length for ind_chromosome in self.chromosomes.values()]) - - - def get_genome_cm_length(self): - """Gets the sum of each chromosome's length in centimorgans""" - - return sum([ind_chromosome.cm_length for ind_chromosome in self.chromosomes.values()]) - - def get_cm_length_list(self): - """Chromosome length in centimorgans - - Calculates the length in centimorgans by subtracting the centimorgan position - of the last marker in a chromosome by the position of the first marker - - """ - - self.dataset.group.read_genotype_file() - - self.cm_length_list = [] - - for chromosome in self.dataset.group.genotype: - self.cm_length_list.append(chromosome[-1].cM - chromosome[0].cM) - - print("self.cm_length_list:", pf(self.cm_length_list)) - - assert len(self.cm_length_list) == len(self.chromosomes), "Uh-oh lengths should be equal!" - for counter, chromosome in enumerate(self.chromosomes.values()): - chromosome.cm_length = self.cm_length_list[counter] - #self.chromosomes[counter].cm_length = item - - for key, value in self.chromosomes.items(): - print("bread - %s: %s" % (key, pf(vars(value)))) - - -# Testing -#if __name__ == '__main__': -# foo = dict(bar=dict(length)) + self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
\ No newline at end of file diff --git a/wqflask/base/template.py b/wqflask/base/template.py deleted file mode 100644 index aa8f90dc..00000000 --- a/wqflask/base/template.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -template = """ -<?XML VERSION="1.0" ENCODING="UTF-8"> -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<TITLE>%s</TITLE> - -<META http-equiv=Content-Type content="text/html; charset=iso-8859-1"> -<META NAME="keywords" CONTENT="genetics, bioinformatics, genome, phenome, gene expression, complex trait analysis, gene mapping, SNP, quantitative trait locus QTL, expression eQTL, WebQTL, Traitnet, Traitnetwork, personalized medicine"> -<META NAME="description" CONTENT ="GeneNetwork is a free scientific web resource used to study relationships between differences in genes, environmental factors, phenotypes, and disease risk." > -<META NAME="author" CONTENT ="GeneNetwork developers" > -<META NAME="geo.placename" CONTENT ="Memphis, TN" > -<META NAME="geo.region" CONTENT="US-TN"> -%s -<LINK REL="stylesheet" TYPE="text/css" HREF='/css/general.css'> -<LINK REL="stylesheet" TYPE="text/css" HREF='/css/menu.css'> -<link rel="stylesheet" media="all" type="text/css" href="/css/tabbed_pages.css" /> -<LINK REL="apple-touch-icon" href="/images/ipad_icon3.png" /> -<link type="text/css" href='/css/custom-theme/jquery-ui-1.8.12.custom.css' rel='Stylesheet' /> -<link type="text/css" href='/css/tab_style.css' rel='Stylesheet' /> - -<script type="text/javascript" src="/javascript/jquery-1.5.2.min.js"></script> -<SCRIPT SRC="/javascript/webqtl.js"></SCRIPT> -<SCRIPT SRC="/javascript/dhtml.js"></SCRIPT> -<SCRIPT SRC="/javascript/tablesorter.js"></SCRIPT> -<SCRIPT SRC="/javascript/jqueryFunction.js"></SCRIPT> -<script src="/javascript/tabbed_pages.js" type="text/javascript"></script> -<script src="/javascript/jquery-ui-1.8.12.custom.min.js" type="text/javascript"></script> -%s - -<script type="text/javascript"> - var _gaq = _gaq || []; - _gaq.push(['_setAccount', 'UA-3782271-1']); - _gaq.push(['_trackPageview']); - (function() { - var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; - ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; - var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); - })(); -</script> -</HEAD> -<BODY bottommargin="2" leftmargin="2" rightmargin="2" topmargin="2" text=#000000 bgColor=#ffffff %s> -%s -<TABLE cellSpacing=5 cellPadding=4 width="100%%" border=0> - <TBODY> - <!-- Start of header --> - <TR> - %s - </TR> - <!-- End of header --> - - <!-- Start of body --> - <TR> - <TD bgColor=#eeeeee class="solidBorder"> - <Table width= "100%%" cellSpacing=0 cellPadding=5> - <TR> - %s - </TR> - </TABLE> - </TD> - </TR> - <!-- End of body --> - - <!-- Start of footer --> - <TR> - <TD align=center bgColor=#ddddff class="solidBorder"> - <TABLE width="90%%">%s</table> - </td> - </TR> - <!-- End of footer --> -</TABLE> - -<!-- menu script itself. you should not modify this file --> -<script language="JavaScript" src="/javascript/menu_new.js"></script> -<!-- items structure. menu hierarchy and links are stored there --> -<script language="JavaScript" src="/javascript/menu_items.js"></script> -<!-- files with geometry and styles structures --> -<script language="JavaScript" src="/javascript/menu_tpl.js"></script> -<script language="JavaScript"> - <!--// - // Note where menu initialization block is located in HTML document. - // Don't try to position menu locating menu initialization block in - // some table cell or other HTML element. Always put it before </body> - // each menu gets two parameters (see demo files) - // 1. items structure - // 2. geometry structure - new menu (MENU_ITEMS, MENU_POS); - // make sure files containing definitions for these variables are linked to the document - // if you got some javascript error like "MENU_POS is not defined", then you've made syntax - // error in menu_tpl.js file or that file isn't linked properly. - - // also take a look at stylesheets loaded in header in order to set styles - //--> -</script> -</BODY> -</HTML> -""" diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index acc055d8..b71dacf6 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -25,10 +25,6 @@ logger = getLogger(__name__ ) from wqflask import user_manager -def print_mem(stage=""): - mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - print("{}: {}".format(stage, mem/1024)) - class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -63,9 +59,7 @@ class GeneralTrait(object): self.symbol = None self.LRS_score_repr = "N/A" - self.LRS_score_value = 0 self.LRS_location_repr = "N/A" - self.LRS_location_value = 1000000 if kw.get('fullname'): name2 = value.split("::") @@ -82,90 +76,6 @@ class GeneralTrait(object): if get_sample_info != False: self = retrieve_sample_data(self, self.dataset) - - def get_name(self): - stringy = "" - if self.dataset and self.name: - stringy = "%s::%s" % (self.dataset, self.name) - if self.cellid: - stringy += "::" + self.cellid - else: - stringy = self.description - return stringy - - - def get_given_name(self): - """ - when user enter a trait or GN generate a trait, user want show the name - not the name that generated by GN randomly, the two follow function are - used to give the real name and the database. displayName() will show the - database also, getGivenName() just show the name. - For other trait, displayName() as same as getName(), getGivenName() as - same as self.name - - Hongqiang 11/29/07 - - """ - stringy = self.name - if self.dataset and self.name: - desc = self.dataset.get_desc() - if desc: - #desc = self.handle_pca(desc) - stringy = desc - return stringy - - - def display_name(self): - stringy = "" - if self.dataset and self.name: - desc = self.dataset.get_desc() - #desc = self.handle_pca(desc) - if desc: - #desc = self.handle_pca(desc) - #stringy = desc - #if desc.__contains__('PCA'): - # desc = desc[desc.rindex(':')+1:].strip() - #else: - # desc = desc[:desc.index('entered')].strip() - #desc = self.handle_pca(desc) - stringy = "%s::%s" % (self.dataset, desc) - else: - stringy = "%s::%s" % (self.dataset, self.name) - if self.cellid: - stringy += "::" + self.cellid - else: - stringy = self.description - - return stringy - - - #def __str__(self): - # #return "%s %s" % (self.getName(), self.group) - # return self.getName() - #__str__ = getName - #__repr__ = __str__ - - def export_data(self, samplelist, the_type="val"): - """ - export data according to samplelist - mostly used in calculating correlation - - """ - result = [] - for sample in samplelist: - if self.data.has_key(sample): - if the_type=='val': - result.append(self.data[sample].val) - elif the_type=='var': - result.append(self.data[sample].var) - elif the_type=='N': - result.append(self.data[sample].N) - else: - raise KeyError, `the_type`+' the_type is incorrect.' - else: - result.append(None) - return result - def export_informative(self, include_variance=0): """ export informative sample @@ -185,19 +95,6 @@ class GeneralTrait(object): sample_aliases.append(sample_data.name2) return samples, vals, the_vars, sample_aliases - - @property - def name_header_fmt(self): - '''Return a human-readable name for use in page header''' - if self.dataset.type == 'ProbeSet': - return self.symbol - elif self.dataset.type == 'Geno': - return self.name - elif self.dataset.type == 'Publish': - return self.post_publication_abbreviation - else: - return "unnamed" - @property def description_fmt(self): '''Return a text formated description''' @@ -252,29 +149,6 @@ class GeneralTrait(object): fmt += (' on the minus strand ') return fmt - -# In ProbeSet, there are maybe several annotations match one sequence -# so we need use sequence(BlatSeq) as the identification, when we update -# one annotation, we update the others who match the sequence also. -# -# Hongqiang Li, 3/3/2008 -def getSequence(trait, dataset_name): - dataset = create_dataset(dataset_name) - - if dataset.type == 'ProbeSet': - results = g.db.execute(''' - SELECT - ProbeSet.BlatSeq - FROM - ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSet.Id=ProbeSetXRef.ProbeSetId and - ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and - ProbeSet.Name = %s - ProbeSetFreeze.Name = %s - ''', trait.name, dataset.name).fetchone() - - return results[0] def retrieve_sample_data(trait, dataset, samplelist=None): if samplelist == None: @@ -293,18 +167,6 @@ def retrieve_sample_data(trait, dataset, samplelist=None): if not samplelist or (samplelist and name in samplelist): trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) return trait - -def convert_location_to_value(chromosome, mb): - try: - location_value = int(chromosome)*1000 + float(mb) - except ValueError: - if chromosome.upper() == 'X': - location_value = 20*1000 + float(mb) - else: - location_value = (ord(str(chromosome).upper()[0])*1000 + - float(mb)) - - return location_value @app.route("/trait/get_sample_data") def get_sample_data(): @@ -542,38 +404,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if trait.pubmed_id: trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id - - trait.homologeneid = None if dataset.type == 'ProbeSet' and dataset.group: - if trait.geneid: - #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. - #XZ: So I have to test if geneid is number before execute the query. - #XZ: The geneid values in database should be cleaned up. - #try: - # float(self.geneid) - # geneidIsNumber = True - #except ValueError: - # geneidIsNumber = False - #if geneidIsNumber: - query = """ - SELECT - HomologeneId - FROM - Homologene, Species, InbredSet - WHERE - Homologene.GeneId ='%s' AND - InbredSet.Name = '%s' AND - InbredSet.SpeciesId = Species.Id AND - Species.TaxonomyId = Homologene.TaxonomyId - """ % (escape(str(trait.geneid)), escape(dataset.group.name)) - logger.sql(query) - result = g.db.execute(query).fetchone() - #else: - # result = None - - if result: - trait.homologeneid = result[0] - description_string = unicode(str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') target_string = unicode(str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') @@ -589,46 +420,19 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): # Save it for the jinja2 template trait.description_display = description_display - #XZ: trait_location_value is used for sorting trait.location_repr = 'N/A' - trait.location_value = 1000000 - if trait.chr and trait.mb: - #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y") - #This is so we can convert the location to a number used for sorting - trait_location_value = convert_location_to_value(trait.chr, trait.mb) - #try: - # trait_location_value = int(self.chr)*1000 + self.mb - #except ValueError: - # if self.chr.upper() == 'X': - # trait_location_value = 20*1000 + self.mb - # else: - # trait_location_value = (ord(str(self.chr).upper()[0])*1000 + - # self.mb) - - #ZS: Put this in function currently called "convert_location_to_value" trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) - trait.location_value = trait_location_value elif dataset.type == "Geno": trait.location_repr = 'N/A' - trait.location_value = 1000000 - if trait.chr and trait.mb: - #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y") - #This is so we can convert the location to a number used for sorting - trait_location_value = convert_location_to_value(trait.chr, trait.mb) - - #ZS: Put this in function currently called "convert_location_to_value" trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) - trait.location_value = trait_location_value if get_qtl_info: #LRS and its location trait.LRS_score_repr = "N/A" - trait.LRS_score_value = 0 trait.LRS_location_repr = "N/A" - trait.LRS_location_value = 1000000 if dataset.type == 'ProbeSet' and not trait.cellid: query = """ SELECT @@ -699,19 +503,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.locus = trait.lrs = trait.additive = "" if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and trait.locus_chr != "" and trait.locus_mb != "": - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(trait.locus_chr)*1000 + float(trait.locus_mb) - except: - if trait.locus_chr.upper() == 'X': - LRS_location_value = 20*1000 + float(trait.locus_mb) - else: - LRS_location_value = ord(str(trait.locus_chr).upper()[0])*1000 + float(trait.locus_mb) - trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (trait.locus_chr, float(trait.locus_mb)) if trait.lrs != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs - trait.LRS_score_value = LRS_score_value = trait.lrs else: raise KeyError, `trait.name`+' information is not found in the database.' diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 1e66e957..4708bf0a 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -20,46 +20,20 @@ USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4} #minimum number of informative strains KMININFORMATIVE = 5 -#maximum number of traits for interval mapping -MULTIPLEMAPPINGLIMIT = 11 - -#maximum number of traits for correlation -MAXCORR = 100 - #Daily download limit from one IP DAILYMAXIMUM = 1000 #maximum LRS value MAXLRS = 460.0 -#temporary data life span -MAXLIFE = 86400 - #MINIMUM Database public value PUBLICTHRESH = 0 -#NBCI address -NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" -UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s" -GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" -OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" -UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s"; -HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=homologene&Cmd=DetailsSearch&Term=%s" +#EXTERNAL LINK ADDRESSES PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" -UCSC_POS = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=mammal&org=%s&db=%s&position=chr%s:%s-%s&pix=800&Submit=submit" UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' -UCSC_GENOME = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=http://web2qtl.utmem.edu:88/snp/chr%s" -ENSEMBLE_BLAT = 'http://www.ensembl.org/Mus_musculus/featureview?type=AffyProbe&id=%s' -DBSNP = 'http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s' -UCSC_RUDI_TRACK_URL = " http://genome.cse.ucsc.edu/cgi-bin/hgTracks?org=%s&db=%s&hgt.customText=http://gbic.biol.rug.nl/~ralberts/tracks/%s/%s" -GENOMEBROWSER_URL="http://ucscbrowser.genenetwork.org/cgi-bin/hgTracks?clade=mammal&org=Mouse&db=mm9&position=%s&hgt.suggest=&pix=800&Submit=submit" -ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?species=Mus_musculus;idx=Transcript;end=1;q=%s" - -# The following paths are no longer in use! -# HTMLPATH is replaced by GENODIR -# IMGDIR is replaced by GENERATED_IMAGE_DIR # Temporary storage (note that this TMPDIR can be set as an # environment variable - use utility.tools.TEMPDIR when you diff --git a/wqflask/base/webqtlFormData.py b/wqflask/base/webqtlFormData.py deleted file mode 100644 index 10251756..00000000 --- a/wqflask/base/webqtlFormData.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -#from mod_python import Cookie - -from __future__ import print_function -from pprint import pformat as pf - -import string -import os - -import reaper - -import webqtlConfig -from webqtlCaseData import webqtlCaseData -from utility import webqtlUtil - -class webqtlFormData(object): - 'Represents data from a WebQTL form page, needed to generate the next page' - - attrs = ('formID','group','genotype','samplelist','allsamplelist', 'display_variance' - 'suggestive','significance','submitID','identification', 'enablevariance', - 'nperm','nboot','email','incparentsf1','genotype_1','genotype_2','traitInfo') - - #XZ: Attention! All attribute values must be picklable! - - def __init__(self, - start_vars = None, - req = None, - mod_python_session=None, - FieldStorage_formdata=None): - # Todo: rework this whole thing - print("in webqtlFormData start_vars are:", pf(start_vars)) - for item in webqtlFormData.attrs: - self.__dict__[item] = None - - #ZS: This is only used in DataEditingPage.py (as far as I know) - self.varianceDispName = None - - for item in start_vars: - self.__dict__[item] = start_vars[item] - print(" Now self.dict is:", pf(self.__dict__)) - - #Todo: This can't be good below...rework - try: - self.remote_ip = req.connection.remote_ip - except: - self.remote_ip = '1.2.3.4' - - if req and req.headers_in.has_key('referer'): - self.refURL = req.headers_in['referer'] - else: - self.refURL = None - - # For now let's just comment all this out - Sam - - #self.cookies = cookieData.cookieData(Cookie.get_cookies(req)) #XZ: dictionary type. To hold values transfered from mod_python Cookie. - # - ##XZ: dictionary type. To hold values transfered from mod_python Session object. We assume that it is always picklable. - #self.input_session_data = sessionData.sessionData( mod_python_session ) - # - ##XZ: FieldStorage_formdata may contain item that can't be pickled. Must convert to picklable data. - #self.formdata = cgiData( FieldStorage_formdata ) - # - ##get Form ID - #self.formID = self.formdata.getfirst('FormID') - # - ##get rest of the attributes - #if self.formID: - # for item in self.attrs: - # value = self.formdata.getfirst(item) - # if value != None: - # setattr(self,item,string.strip(value)) - - self.ppolar = None - self.mpolar = None - - print("[yellow] self.group is:", self.group) - if self.group: - #try: - # # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py; - _f1, _f12, self.mpolar, self.ppolar = webqtlUtil.ParInfo[self.group] - #except: - # f1 = f12 = self.mpolar = self.ppolar = None - - - def set_number(stringy): - return int(stringy) if stringy else 2000 # Rob asked to change the default value to 2000 - - self.nperm = set_number(self.nperm) - self.nboot = set_number(self.nboot) - - - #if self.allsamplelist: - # self.allsamplelist = map(string.strip, string.split(self.allsamplelist)) - print("self.allsamplelist is:", self.allsamplelist) - if self.allsamplelist: - self.allsamplelist = self.allsamplelist.split() - print("now self.allsamplelist is:", self.allsamplelist) - #self.readGenotype() - #self.readData() - - if self.group == 'BXD300': - self.group = 'BXD' - - - def __getitem__(self, key): - print("in __getitem__") - return self.__dict__[key] - - def get(self, key, default=None): - if key in self.__dict__: - return self.__dict__[key] - else: - return default - - def __str__(self): - rstr = '' - for item in self.attrs: - if item != 'genotype': - rstr += '%s:%s\n' % (item,str(getattr(self,item))) - return rstr - - - def readGenotype(self): - '''read genotype from .geno file''' - if self.group == 'BXD300': - self.group = 'BXD' - - assert self.group, "self.group needs to be set" - - #genotype_1 is Dataset Object without parents and f1 - #genotype_2 is Dataset Object with parents and f1 (not for intercross) - - self.genotype_1 = reaper.Dataset() - - full_filename = locate(self.group + '.geno','genotype') - - # reaper barfs on unicode filenames, so here we ensure it's a string - full_filename = str(full_filename) - self.genotype_1.read(full_filename) - - print("Got to after read") - - try: - # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py; - _f1, _f12, _mat, _pat = webqtlUtil.ParInfo[self.group] - except KeyError: - _f1 = _f12 = _mat = _pat = None - - self.genotype_2 = self.genotype_1 - if self.genotype_1.type == "group" and _mat and _pat: - self.genotype_2 = self.genotype_1.add(Mat=_mat, Pat=_pat) #, F1=_f1) - - #determine default genotype object - if self.incparentsf1 and self.genotype_1.type != "intercross": - self.genotype = self.genotype_2 - else: - self.incparentsf1 = 0 - self.genotype = self.genotype_1 - - self.samplelist = list(self.genotype.prgy) - self.f1list = [] - self.parlist = [] - - if _f1 and _f12: - self.f1list = [_f1, _f12] - if _mat and _pat: - self.parlist = [_mat, _pat] - - - def readData(self, samplelist, incf1=None): - '''read user input data or from trait data and analysis form''' - - if incf1 == None: - incf1 = [] - - if not self.genotype: - self.readGenotype() - if not samplelist: - if incf1: - samplelist = self.f1list + self.samplelist - else: - samplelist = self.samplelist - - #print("before traitfiledata self.traitfile is:", pf(self.traitfile)) - - traitfiledata = getattr(self, "traitfile", None) - traitpastedata = getattr(self, "traitpaste", None) - variancefiledata = getattr(self, "variancefile", None) - variancepastedata = getattr(self, "variancepaste", None) - Nfiledata = getattr(self, "Nfile", None) - - #### Todo: Rewrite below when we get to someone submitting their own trait ##### - - def to_float(item): - try: - return float(item) - except ValueError: - return None - - print("bottle samplelist is:", samplelist) - if traitfiledata: - tt = traitfiledata.split() - values = map(webqtlUtil.StringAsFloat, tt) - elif traitpastedata: - tt = traitpastedata.split() - values = map(webqtlUtil.StringAsFloat, tt) - else: - print("mapping formdataasfloat") - #values = map(self.FormDataAsFloat, samplelist) - values = [to_float(getattr(self, key)) for key in samplelist] - print("rocket values is:", values) - - - if len(values) < len(samplelist): - values += [None] * (len(samplelist) - len(values)) - elif len(values) > len(samplelist): - values = values[:len(samplelist)] - print("now values is:", values) - - - if variancefiledata: - tt = variancefiledata.split() - variances = map(webqtlUtil.StringAsFloat, tt) - elif variancepastedata: - tt = variancepastedata.split() - variances = map(webqtlUtil.StringAsFloat, tt) - else: - variances = map(self.FormVarianceAsFloat, samplelist) - - if len(variances) < len(samplelist): - variances += [None]*(len(samplelist) - len(variances)) - elif len(variances) > len(samplelist): - variances = variances[:len(samplelist)] - - if Nfiledata: - tt = string.split(Nfiledata) - nsamples = map(webqtlUtil.IntAsFloat, tt) - if len(nsamples) < len(samplelist): - nsamples += [None]*(len(samplelist) - len(nsamples)) - else: - nsamples = map(self.FormNAsFloat, samplelist) - - ##values, variances, nsamples is obsolete - self.allTraitData = {} - for i, _sample in enumerate(samplelist): - if values[i] != None: - self.allTraitData[_sample] = webqtlCaseData( - _sample, values[i], variances[i], nsamples[i]) - print("allTraitData is:", pf(self.allTraitData)) - - - - def informativeStrains(self, samplelist=None, include_variances = None): - '''if readData was called, use this to output informative samples (sample with values)''' - - if not samplelist: - samplelist = self.samplelist - - samples = [] - values = [] - variances = [] - - #print("self.allTraitData is:", pf(self.allTraitData)) - - for sample in samplelist: - if sample in self.allTraitData: - _val, _var = self.allTraitData[sample].value, self.allTraitData[sample].variance - if _val != None: - if include_variances: - if _var != None: - samples.append(sample) - values.append(_val) - variances.append(_var) - else: - samples.append(sample) - values.append(_val) - variances.append(None) - - return samples, values, variances, len(samples) - - - - #def FormDataAsFloat(self, key): - # - # #try: - # # return float(self.key) - # #except: - # # return None - - - def FormVarianceAsFloat(self, key): - try: - return float(self.formdata.getfirst('V' + key)) - except: - return None - - def FormNAsFloat(self, key): - try: - return int(self.formdata.getfirst('N' + key)) - except: - return None - - def Sample(self): - 'Create some dummy data for testing' - self.group = 'BXD' - self.incparentsf1 = 'on' - #self.display = 9.2 - #self.significance = 16.1 - self.readGenotype() - self.identification = 'BXD : Coat color example by Lu Lu, et al' - #self.readGenotype() - #self.genotype.ReadMM('AXBXAforQTL') - #self.samplelist = map((lambda x, y='': '%s%s' % (y,x)), self.genotype.prgy) - #self.samplelist.sort() - self.allTraitData = {'BXD29': webqtlCaseData(3), 'BXD28': webqtlCaseData(2), - 'BXD25': webqtlCaseData(2), 'BXD24': webqtlCaseData(2), 'BXD27': webqtlCaseData(2), - 'BXD21': webqtlCaseData(1), 'BXD20': webqtlCaseData(4), 'BXD23': webqtlCaseData(4), - 'BXD22': webqtlCaseData(3), 'BXD14': webqtlCaseData(4), 'BXD15': webqtlCaseData(2), - 'BXD16': webqtlCaseData(3), 'BXD11': webqtlCaseData(4), 'BXD12': webqtlCaseData(3), - 'BXD13': webqtlCaseData(2), 'BXD18': webqtlCaseData(3), 'BXD19': webqtlCaseData(3), - 'BXD38': webqtlCaseData(3), 'BXD39': webqtlCaseData(3), 'BXD36': webqtlCaseData(2), - 'BXD34': webqtlCaseData(4), 'BXD35': webqtlCaseData(4), 'BXD32': webqtlCaseData(4), - 'BXD33': webqtlCaseData(3), 'BXD30': webqtlCaseData(1), 'BXD31': webqtlCaseData(4), - 'DBA/2J': webqtlCaseData(1), 'BXD8': webqtlCaseData(3), 'BXD9': webqtlCaseData(1), - 'BXD6': webqtlCaseData(3), 'BXD5': webqtlCaseData(3), 'BXD2': webqtlCaseData(4), - 'BXD1': webqtlCaseData(1), 'C57BL/6J': webqtlCaseData(4), 'B6D2F1': webqtlCaseData(4), - 'BXD42': webqtlCaseData(4), 'BXD40': webqtlCaseData(3)} diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py deleted file mode 100644 index 1e5646a1..00000000 --- a/wqflask/basicStatistics/BasicStatisticsFunctions.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import print_function - -#import string -from math import * -#import piddle as pid -#import os -import traceback - -from pprint import pformat as pf - -from corestats import Stats - -import reaper -from htmlgen import HTMLgen2 as HT - -#from utility import Plot -from utility import webqtlUtil -from base import webqtlConfig -from db import webqtlDatabaseFunction - -def basicStatsTable(vals, trait_type=None, cellid=None, heritability=None): - print("basicStatsTable called - len of vals", len(vals)) - st = {} # This is the dictionary where we'll put everything for the template - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - (st['traitmean'], - st['traitmedian'], - st['traitvar'], - st['traitstdev'], - st['traitsem'], - st['N']) = reaper.anova(valsOnly) #ZS: Should convert this from reaper to R in the future - - #tbl = HT.TableLite(cellpadding=20, cellspacing=0) - #dataXZ = vals[:] - dataXZ = sorted(vals, webqtlUtil.cmpOrder) - - print("data for stats is:", pf(dataXZ)) - for num, item in enumerate(dataXZ): - print(" %i - %s" % (num, item)) - print(" length:", len(dataXZ)) - - st['min'] = dataXZ[0][1] - st['max'] = dataXZ[-1][1] - - numbers = [x[1] for x in dataXZ] - stats = Stats(numbers) - - at75 = stats.percentile(75) - at25 = stats.percentile(25) - print("should get a stack") - traceback.print_stack() - print("Interquartile:", at75 - at25) - - #tbl.append(HT.TR(HT.TD("Statistic",align="left", Class="fs14 fwb ffl b1 cw cbrb", width = 180), - # HT.TD("Value", align="right", Class="fs14 fwb ffl b1 cw cbrb", width = 60))) - #tbl.append(HT.TR(HT.TD("N of Samples",align="left", Class="fs13 b1 cbw c222"), - # HT.TD(N,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Mean",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitmean,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Median",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitmedian,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - ##tbl.append(HT.TR(HT.TD("Variance",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - ## HT.TD("%2.3f" % traitvar,nowrap="yes",align="left", Class="fs13 b1 cbw c222"))) - #tbl.append(HT.TR(HT.TD("Standard Error (SE)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitsem,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Standard Deviation (SD)", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitstdev,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Minimum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%s" % dataXZ[0][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Maximum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%s" % dataXZ[-1][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - - - - if (trait_type != None and trait_type == 'ProbeSet'): - #tbl.append(HT.TR(HT.TD("Range (log2)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % (dataXZ[-1][1]-dataXZ[0][1]),nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD(HT.Span("Range (fold)"),align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.2f" % pow(2.0,(dataXZ[-1][1]-dataXZ[0][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD(HT.Span(HT.Href(url="/glossary.html#Interquartile", target="_blank", text="Interquartile Range", Class="non_bold")), align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.2f" % pow(2.0,(dataXZ[int((N-1)*3.0/4.0)][1]-dataXZ[int((N-1)/4.0)][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - st['range_log2'] = dataXZ[-1][1]-dataXZ[0][1] - st['range_fold'] = pow(2.0, (dataXZ[-1][1]-dataXZ[0][1])) - st['interquartile'] = pow(2.0, (dataXZ[int((st['N']-1)*3.0/4.0)][1]-dataXZ[int((st['N']-1)/4.0)][1])) - - #XZ, 04/01/2009: don't try to get H2 value for probe. - if not cellid: - if heritability: - # This field needs to still be put into the Jinja2 template - st['heritability'] = heritability - #tbl.append(HT.TR(HT.TD(HT.Span("Heritability"),align="center", Class="fs13 b1 cbw c222",nowrap="yes"),HT.TD("%s" % heritability, nowrap="yes",align="center", Class="fs13 b1 cbw c222"))) - - # Lei Yan - # 2008/12/19 - - return st - -def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specialStrains=[None], size=(750,500)): - - dataXZ = vals[:] - dataXZ.sort(webqtlUtil.cmpOrder) - dataLabel = [] - dataX = map(lambda X: X[1], dataXZ) - - showLabel = showstrains - if len(dataXZ) > 50: - showLabel = 0 - for item in dataXZ: - strainName = webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=item[0]) - dataLabel.append(strainName) - - dataY=Plot.U(len(dataX)) - dataZ=map(Plot.inverseCumul,dataY) - c = pid.PILCanvas(size=(750,500)) - Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel) - - filename= webqtlUtil.genRandStr("nP_") - c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - - img=HT.Image('/image/'+filename+'.gif',border=0) - - return img - -def plotBoxPlot(vals): - - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - plotHeight = 320 - plotWidth = 220 - xLeftOffset = 60 - xRightOffset = 40 - yTopOffset = 40 - yBottomOffset = 60 - - canvasHeight = plotHeight + yTopOffset + yBottomOffset - canvasWidth = plotWidth + xLeftOffset + xRightOffset - canvas = pid.PILCanvas(size=(canvasWidth,canvasHeight)) - XXX = [('', valsOnly[:])] - - Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait") - filename= webqtlUtil.genRandStr("Box_") - canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0) - - plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13")) - - return img, plotLink - -def plotBarGraph(identification='', RISet='', vals=None, type="name"): - - this_identification = "unnamed trait" - if identification: - this_identification = identification - - if type=="rank": - dataXZ = vals[:] - dataXZ.sort(webqtlUtil.cmpOrder) - title='%s' % this_identification - else: - dataXZ = vals[:] - title='%s' % this_identification - - tvals = [] - tnames = [] - tvars = [] - for i in range(len(dataXZ)): - tvals.append(dataXZ[i][1]) - tnames.append(webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=dataXZ[i][0])) - tvars.append(dataXZ[i][2]) - nnStrain = len(tnames) - - sLabel = 1 - - ###determine bar width and space width - if nnStrain < 20: - sw = 4 - elif nnStrain < 40: - sw = 3 - else: - sw = 2 - - ### 700 is the default plot width minus Xoffsets for 40 strains - defaultWidth = 650 - if nnStrain > 40: - defaultWidth += (nnStrain-40)*10 - defaultOffset = 100 - bw = int(0.5+(defaultWidth - (nnStrain-1.0)*sw)/nnStrain) - if bw < 10: - bw = 10 - - plotWidth = (nnStrain-1)*sw + nnStrain*bw + defaultOffset - plotHeight = 500 - #print [plotWidth, plotHeight, bw, sw, nnStrain] - c = pid.PILCanvas(size=(plotWidth,plotHeight)) - Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw) - - filename= webqtlUtil.genRandStr("Bar_") - c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0) - - return img diff --git a/wqflask/basicStatistics/__init__.py b/wqflask/basicStatistics/__init__.py deleted file mode 100644 index e69de29b..00000000 --- a/wqflask/basicStatistics/__init__.py +++ /dev/null diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 05006d5c..45522705 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -17,17 +17,12 @@ import glob import traceback import gzip -#import numpy as np -#from pyLMM import lmm - import simplejson as json from pprint import pformat as pf class EmptyConfigurations(Exception): pass - - class Marker(object): def __init__(self): self.name = None @@ -39,47 +34,34 @@ class Marker(object): class ConvertGenoFile(object): def __init__(self, input_file, output_files): - self.input_file = input_file self.output_files = output_files - + self.mb_exists = False self.cm_exists = False self.markers = [] - + self.latest_row_pos = None self.latest_col_pos = None - + self.latest_row_value = None self.latest_col_value = None - - def convert(self): + def convert(self): self.haplotype_notation = { '@mat': "1", '@pat': "0", '@het': "0.5", '@unk': "NA" } - + self.configurations = {} - #self.skipped_cols = 3 - - #if self.input_file.endswith(".geno.gz"): - # print("self.input_file: ", self.input_file) - # self.input_fh = gzip.open(self.input_file) - #else: self.input_fh = open(self.input_file) - - with open(self.output_files[0], "w") as self.geno_fh: - #if self.file_type == "geno": - self.process_csv() - #elif self.file_type == "snps": - # self.process_snps_file() + self.process_csv() def process_csv(self): - for row_count, row in enumerate(self.process_rows()): + for row in self.process_rows(): row_items = row.split("\t") this_marker = Marker() @@ -102,53 +84,30 @@ class ConvertGenoFile(object): this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) else: this_marker.genotypes.append("NA") - - #print("this_marker is:", pf(this_marker.__dict__)) - #if this_marker.chr == "14": + self.markers.append(this_marker.__dict__) self.write_to_bimbam() - - # with open(self.output_file, 'w') as fh: - # json.dump(self.markers, fh, indent=" ", sort_keys=True) - - # print('configurations:', str(configurations)) - #self.latest_col_pos = item_count + self.skipped_cols - #self.latest_col_value = item - - #if item_count != 0: - # self.output_fh.write(" ") - #self.output_fh.write(self.configurations[item.upper()]) - - #self.output_fh.write("\n") def write_to_bimbam(self): with open(self.output_files[0], "w") as geno_fh: - # geno_fh.write(str(len(self.sample_list)) + "\n") - # geno_fh.write("2\n") - # geno_fh.write("IND") - # for sample in self.sample_list: - # geno_fh.write(" " + sample) - # geno_fh.write("\n") for marker in self.markers: geno_fh.write(marker['name']) geno_fh.write(", X, Y") geno_fh.write(", " + ", ".join(marker['genotypes'])) geno_fh.write("\n") - - #pheno_fh = open(self.output_files[1], 'w') + with open(self.output_files[1], "w") as pheno_fh: for sample in self.sample_list: pheno_fh.write("1\n") - + with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") else: snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") - - + def get_sample_list(self, row_contents): self.sample_list = [] if self.mb_exists: @@ -164,8 +123,6 @@ class ConvertGenoFile(object): def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): - #if self.input_file.endswith(".geno.gz"): - # print("row: ", row) self.latest_row_value = row # Take care of headers if not row.strip(): @@ -208,10 +165,8 @@ class ConvertGenoFile(object): convertob.convert() except EmptyConfigurations as why: print(" No config info? Continuing...") - #excepted = True continue except Exception as why: - print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, @@ -219,12 +174,6 @@ class ConvertGenoFile(object): print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - - #def process_snps_file(cls, snps_file, new_directory): - # output_file = os.path.join(new_directory, "mouse_families.json") - # print("%s -> %s" % (snps_file, output_file)) - # convertob = ConvertGenoFile(input_file, output_file) - if __name__=="__main__": Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/""" @@ -234,6 +183,4 @@ if __name__=="__main__": #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") #convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) - - #process_csv(Input_File, Output_File)
\ No newline at end of file + #ConvertGenoFiles(Geno_Directory)
\ No newline at end of file diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 79242661..2825c6ea 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -78,7 +78,6 @@ def parse_db_uri(db_uri): return db_conn_info - def get_species(): """Build species list""" Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") @@ -265,7 +264,7 @@ def build_datasets(species, group, type_name): def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" - parse_db_uri(SQL_URI) + parse_db_uri() species = get_species() groups = get_groups(species) @@ -304,6 +303,6 @@ def _test_it(): #print("build_datasets:", pf(datasets)) if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri(SQL_URI)) + Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() - main() + main()
\ No newline at end of file diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 04e94886..1dc6c46c 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -6,16 +6,6 @@ import gzip from base import webqtlConfig -def process_genofiles(geno_dir=webqtlConfig.GENODIR): - print("Yabba") - #sys.exit("Dabba") - os.chdir(geno_dir) - for geno_file in glob.glob("*"): - if geno_file.lower().endswith(('.geno', '.geno.gz')): - #group_name = genofilename.split('.')[0] - sample_list = get_samplelist(geno_file) - - def get_samplelist(file_type, geno_file): if file_type == "geno": return get_samplelist_from_geno(geno_file) diff --git a/wqflask/utility/AJAX_table.py b/wqflask/utility/AJAX_table.py deleted file mode 100644 index d70acfcd..00000000 --- a/wqflask/utility/AJAX_table.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -import cPickle -import os -import MySQLdb -import time -import pyXLWriter as xl - -from htmlgen import HTMLgen2 as HT - -from base import webqtlConfig -from THCell import THCell -from TDCell import TDCell -import webqtlUtil - - -class AJAX_table: - def __init__(self, fd): - file = fd.formdata.getfirst("file", "") - sort = fd.formdata.getfirst("sort", "") - order = fd.formdata.getfirst("order", "up") - cmd = fd.formdata.getfirst("cmd", "") - tableID = fd.formdata.getfirst("tableID", "") - addIndex = fd.formdata.getfirst("addIndex", "1") - hiddenColumnsString = fd.formdata.getfirst("hiddenColumns", "") - hiddenColumns = hiddenColumnsString.split(',') - - try: - fp = open(os.path.join(webqtlConfig.TMPDIR, file + '.obj'), 'rb') - tblobj = cPickle.load(fp) - fp.close() - - if cmd == 'addCorr': - dbId = int(fd.formdata.getfirst("db")) - dbFullName = fd.formdata.getfirst("dbname") - trait = fd.formdata.getfirst("trait") - form = fd.formdata.getfirst("form") - ids = fd.formdata.getfirst("ids") - vals = fd.formdata.getfirst("vals") - ids = eval(ids) - nnCorr = len(ids) - vals = eval(vals) - - workbook = xl.Writer('%s.xls' % (webqtlConfig.TMPDIR+file)) - worksheet = workbook.add_worksheet() - - logger.warning("Creating new MySQLdb cursor (this method is OBSOLETE!)") - - con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD) - cursor = con.cursor() - - cursor.execute("Select name, ShortName from ProbeSetFreeze where Id = %s", dbId) - dbName, dbShortName = cursor.fetchone() - - tblobj['header'][0].append( - THCell(HT.TD(dbShortName, Class="fs11 ffl b1 cw cbrb"), - text="%s" % dbShortName, idx=tblobj['header'][0][-1].idx + 1), - ) - - headingStyle = workbook.add_format(align = 'center', bold = 1, border = 1, size=13, fg_color = 0x1E, color="white") - for i, item in enumerate(tblobj['header'][0]): - if (i > 0): - worksheet.write([8, i-1], item.text, headingStyle) - worksheet.set_column([i-1, i-1], 2*len(item.text)) - - for i, row in enumerate(tblobj['body']): - ProbeSetId = row[1].text - #XZ, 03/02/2009: Xiaodong changed Data to ProbeSetData - cursor.execute(""" - Select ProbeSetData.StrainId, ProbeSetData.Value - From ProbeSetData, ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %d AND - ProbeSetXRef.DataId = ProbeSetData.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSet.Name = '%s' - """ % (dbId, ProbeSetId)) - results = cursor.fetchall() - vdict = {} - for item in results: - vdict[item[0]] = item[1] - newvals = [] - for id in ids: - if vdict.has_key(id): - newvals.append(vdict[id]) - else: - newvals.append(None) - corr,nOverlap= webqtlUtil.calCorrelation(newvals,vals,nnCorr) - repr = '%0.4f' % corr - row.append( - TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlotThird('%s', '%s', '%s')" % (form, dbName, ProbeSetId), Class="fs11 fwn ffl"), " / ", nOverlap, Class="fs11 fwn ffl b1 c222", align="middle"),repr,abs(corr)) - ) - - last_row=0 - for j, item in enumerate(tblobj['body'][i]): - if (j > 0): - worksheet.write([9+i, j-1], item.text) - last_row = 9+i - last_row += 1 - - titleStyle = workbook.add_format(align = 'left', bold = 0, size=14, border = 1, border_color="gray") - ##Write title Info - # Modified by Hongqiang Li - worksheet.write([0, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) - worksheet.write([1, 0], "Trait : %s" % trait, titleStyle) - worksheet.write([2, 0], "Database : %s" % dbFullName, titleStyle) - worksheet.write([3, 0], "Date : %s" % time.strftime("%B %d, %Y", time.gmtime()), titleStyle) - worksheet.write([4, 0], "Time : %s GMT" % time.strftime("%H:%M ", time.gmtime()), titleStyle) - worksheet.write([5, 0], "Status of data ownership: Possibly unpublished data; please see %s/statusandContact.html for details on sources, ownership, and usage of these data." % webqtlConfig.PORTADDR, titleStyle) - #Write footer info - worksheet.write([1 + last_row, 0], "Funding for The GeneNetwork: NIAAA (U01AA13499, U24AA13513), NIDA, NIMH, and NIAAA (P20-DA21131), NCI MMHCC (U01CA105417), and NCRR (U01NR 105417)", titleStyle) - worksheet.write([2 + last_row, 0], "PLEASE RETAIN DATA SOURCE INFORMATION WHENEVER POSSIBLE", titleStyle) - - cursor.close() - workbook.close() - - objfile = open(os.path.join(webqtlConfig.TMPDIR, file + '.obj'), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - else: - pass - - self.value = str(webqtlUtil.genTableObj(tblobj=tblobj, file=file, sortby=(sort, order), tableID = tableID, addIndex = addIndex, hiddenColumns = hiddenColumns)) - - except: - self.value = "<span class='fs16 fwb cr ffl'>The table is no longer available on this server</span>" - - def __str__(self): - return self.value - - def write(self): - return str(self) diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index d60e2bb2..529cd117 100644 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -36,11 +36,9 @@ from numarray import linear_algebra as la from numarray import ones, array, dot, swapaxes import reaper -# sys.path.append("..") Never in a running webserver -from basicStatistics import corestats -import svg import webqtlUtil +import corestats from base import webqtlConfig import utility.logger @@ -83,202 +81,6 @@ def frange(start, end=None, inc=1.0): L[i] = start + i * inc return L - -def gammln(xx): - cof=[76.18009173,-86.50532033,24.01409822,-1.231739516,0.120858003e-2,-0.536382e-5] - x=xx-1.0 - tmp=x+5.5 - tmp -=(x+0.5)*log(tmp) - ser=1.0 - for item in cof: - x+=1.0 - ser+=item/x - - return -tmp+log(2.50662827465*ser) - - -def gser(a,x): - gln=gammln(a) - ITMAX=100 - EPS=3.0e-7 - - if x<=0.0: - gamser=0.0 - return [gamser,gln] - else: - ap=a - sum=1.0/a - dele=sum - for i in range(1,ITMAX+1): - ap+=1.0 - dele*=x/ap - sum+=dele - if abs(dele)<abs(sum)*EPS: - gamser=sum*exp(-x+a*log(x)-gln) - return [gamser,gln] - return None - -def gcf(a,x): - ITMAX=100 - EPS=3.0e-7 - gold=0.0 - fac=1 - b1=1.0 - b0=0.0 - a0=1.0 - gln=gammln(a) - - a1=x - for n in range(1,ITMAX+1): - an=n+0.0 - ana=an-a - a0=(a1+a0*ana)*fac - b0=(b1+b0*ana)*fac - anf=an*fac - a1=x*a0+anf*a1 - b1=x*b0+anf*b1 - if (a1): - fac=1.0/a1 - g=b1*fac - if abs((g-gold)/g)<EPS: - gammcf=exp(-x+a*log(x)-gln)*g - return [gammcf,gln] - gold=g - return None - -def gammp(a,x): - if x<0.0 or a<=0.0: - return None - if x<(a+1.0): - a=gser(a,x)[0] - return a - else: - a=gcf(a,x)[0] - return 1.0-a -def U(n): - x=pow(0.5,1.0/n) - m=[1-x] - for i in range(2,n): - a=(i-0.3175)/(n+0.365) - m.append(a) - m.append(x) - return m - -def erf(x): - if x<0.0: - return -gammp(0.5,x*x) - else: - return gammp(0.5,x*x) - -def erfcc(x): - z=abs(x) - t=1.0/(1.0+0.5*z) - ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277))))))))) - if x>=0.0: - return ans - else: - return 2.0-ans - -def calMeanVar(data): - n=len(data) - if n<2: - return None - else: - sum=reduce(lambda x,y:x+y,data,0.0) - mean=sum/n - z=data[:] - for i in range(n): - z[i]=z[i]-mean - variance=reduce(lambda x,y:x+y*y,z,0.0) - variance /= n-1 - variance =sqrt(variance) - for i in range(n): - z[i]=z[i]/variance - return z - -def inverseCumul(p): - #Coefficients in rational approximations. - a = [-3.969683028665376e+01,2.209460984245205e+02,-2.759285104469687e+02,1.383577518672690e+02,-3.066479806614716e+01,2.506628277459239e+00] - - b = [-5.447609879822406e+01,1.615858368580409e+02,-1.556989798598866e+02,6.680131188771972e+01,-1.328068155288572e+01] - - c = [-7.784894002430293e-03,-3.223964580411365e-01,-2.400758277161838e+00,-2.549732539343734e+00,4.374664141464968e+00,2.938163982698783e+00] - - d = [7.784695709041462e-03,3.224671290700398e-01,2.445134137142996e+00,3.754408661907416e+00] - - #Define break-points. - - p_low = 0.02425 - p_high = 1 - p_low - - #Rational approximation for lower region. - - if p > 0 and p < p_low: - q = sqrt(-2*log(p)) - x = (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1) - - - #Rational approximation for central region. - - elif p>= p_low and p <= p_high: - q = p - 0.5 - r = q*q - x = (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /(((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1) - - #Rational approximation for upper region. - - elif p>p_high and p < 1: - q = sqrt(-2*log(1-p)) - x = -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1) - - else: - return None - - if p>0 and p < 1: - e = 0.5 * erfcc(-x/sqrt(2)) - p - u = e * sqrt(2*pi) * exp(x*x/2) - x = x - u/(1 + x*u/2) - return x - else: - return None - -def gmean(lst): - N = len(lst) - if N == 0: - return 0 - else: - return (reduce(lambda x,y: x+y, lst, 0.0))/N - -def gmedian(lst2): - lst = lst2[:] - N = len(lst) - if N == 0: - return 0 - else: - lst.sort() - if N % 2 == 0: - return (lst[N/2]+lst[(N-2)/2])/2.0 - else: - return lst[(N-1)/2] - -def gpercentile(lst2, np): - """Obsolete - use percentile in corestats instead""" - lst = lst2[:] - N = len(lst) - if N == 0 or np > 100 or np < 0: - return None - else: - lst.sort() - pNadd1 = (np/100.0)*N - k = int(pNadd1) - d = pNadd1 - k - if k == 0: - return lst[0] - elif k >= N-1: - return lst[N-1] - else: - return lst[k-1] + d*(lst[k] - lst[k-1]) - def find_outliers(vals): """Calculates the upper and lower bounds of a set of sample/case values @@ -315,165 +117,6 @@ def find_outliers(vals): logger.debug(pf(locals())) return upper_bound, lower_bound - -def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"): - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - iValues = [] - for item in data: - for item2 in item[1]: - try: - iValues.append(item2[1]) - except: - iValues.append(item2) - - #draw frame - max_Y = max(iValues) - min_Y = min(iValues) - scaleY = detScale(min_Y, max_Y) - Yll = scaleY[0] - Yur = scaleY[1] - nStep = scaleY[2] - stepY = (Yur - Yll)/nStep - stepYPixel = plotHeight/(nStep) - canvas.drawRect(plotWidth+xLeftOffset, plotHeight + yTopOffset, xLeftOffset, yTopOffset) - - ##draw Y Scale - YYY = Yll - YCoord = plotHeight + yTopOffset - scaleFont=pid.Font(ttf="cour",size=11,bold=1) - for i in range(nStep+1): - strY = cformat(d=YYY, rank=0) - YCoord = max(YCoord, yTopOffset) - canvas.drawLine(xLeftOffset,YCoord,xLeftOffset-5,YCoord) - canvas.drawString(strY, xLeftOffset -30,YCoord +5,font=scaleFont) - YYY += stepY - YCoord -= stepYPixel - - ##draw X Scale - stepX = plotWidth/len(data) - XCoord = xLeftOffset + 0.5*stepX - YCoord = plotHeight + yTopOffset - scaleFont = pid.Font(ttf="tahoma",size=12,bold=0) - labelFont = pid.Font(ttf="tahoma",size=13,bold=0) - for item in data: - itemname, itemvalue = item - canvas.drawLine(XCoord, YCoord,XCoord, YCoord+5, color=pid.black) - canvas.drawString(itemname, XCoord - canvas.stringWidth(itemname,font=labelFont)/2.0,\ - YCoord +20,font=labelFont) - - nValue = len(itemvalue) - catValue = [] - for item2 in itemvalue: - try: - tstrain, tvalue = item2 - except: - tvalue = item2 - if nValue <= 4: - canvas.drawCross(XCoord, plotHeight + yTopOffset - (tvalue-Yll)*plotHeight/(Yur - Yll), color=pid.red,size=5) - else: - catValue.append(tvalue) - if catValue != []: - catMean = gmean(catValue) - catMedian = gmedian(catValue) - lowHinge = gpercentile(catValue, 25) - upHinge = gpercentile(catValue, 75) - Hstep = 1.5*(upHinge - lowHinge) - - outlier = [] - extrem = [] - - upperAdj = None - for item in catValue: - if item >= upHinge + 2*Hstep: - extrem.append(item) - elif item >= upHinge + Hstep: - outlier.append(item) - elif item > upHinge and item < upHinge + Hstep: - if upperAdj == None or item > upperAdj: - upperAdj = item - else: - pass - lowerAdj = None - for item in catValue: - if item <= lowHinge - 2*Hstep: - extrem.append(item) - elif item <= lowHinge - Hstep: - outlier.append(item) - if item < lowHinge and item > lowHinge - Hstep: - if lowerAdj == None or item < lowerAdj: - lowerAdj = item - else: - pass - canvas.drawRect(XCoord-20, plotHeight + yTopOffset - (lowHinge-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (upHinge-Yll)*plotHeight/(Yur - Yll)) - canvas.drawLine(XCoord-20, plotHeight + yTopOffset - (catMedian-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (catMedian-Yll)*plotHeight/(Yur - Yll)) - if upperAdj != None: - canvas.drawLine(XCoord, plotHeight + yTopOffset - (upHinge-Yll)*plotHeight/(Yur - Yll), \ - XCoord, plotHeight + yTopOffset - (upperAdj-Yll)*plotHeight/(Yur - Yll)) - canvas.drawLine(XCoord-20, plotHeight + yTopOffset - (upperAdj-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (upperAdj-Yll)*plotHeight/(Yur - Yll)) - if lowerAdj != None: - canvas.drawLine(XCoord, plotHeight + yTopOffset - (lowHinge-Yll)*plotHeight/(Yur - Yll), \ - XCoord, plotHeight + yTopOffset - (lowerAdj-Yll)*plotHeight/(Yur - Yll)) - canvas.drawLine(XCoord-20, plotHeight + yTopOffset - (lowerAdj-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (lowerAdj-Yll)*plotHeight/(Yur - Yll)) - - outlierFont = pid.Font(ttf="cour",size=12,bold=0) - if outlier != []: - for item in outlier: - yc = plotHeight + yTopOffset - (item-Yll)*plotHeight/(Yur - Yll) - #canvas.drawEllipse(XCoord-3, yc-3, XCoord+3, yc+3) - canvas.drawString('o', XCoord-3, yc+5, font=outlierFont, color=pid.orange) - if extrem != []: - for item in extrem: - yc = plotHeight + yTopOffset - (item-Yll)*plotHeight/(Yur - Yll) - #canvas.drawEllipse(XCoord-3, yc-3, XCoord+3, yc+3) - canvas.drawString('*', XCoord-3, yc+6, font=outlierFont, color=pid.red) - - canvas.drawCross(XCoord, plotHeight + yTopOffset - (catMean-Yll)*plotHeight/(Yur - Yll), \ - color=pid.blue,size=3) - #print(catMean, catMedian, cat25per, cat75per) - pass - - XCoord += stepX - - labelFont=pid.Font(ttf="verdana",size=18,bold=0) - canvas.drawString(XLabel, xLeftOffset + (plotWidth -canvas.stringWidth(XLabel,font=labelFont))/2.0, \ - YCoord +40, font=labelFont) - canvas.drawString(YLabel,xLeftOffset-40, YCoord-(plotHeight -canvas.stringWidth(YLabel,font=labelFont))/2.0,\ - font=labelFont, angle =90) - -def plotSecurity(canvas, text="12345"): - if not text: - return - - plotWidth = canvas.size[0] - plotHeight = canvas.size[1] - if plotHeight<=0 or plotWidth<=0: - return - - bgColor = pid.Color(0.6+0.4*random.random(), 0.6+0.4*random.random(), 0.6+0.4*random.random()) - canvas.drawRect(0,0,plotWidth,plotHeight, edgeColor=bgColor, fillColor=bgColor) - - for i in range(30): - randomColor = pid.Color(0.6+0.4*random.random(), 0.6+0.4*random.random(), 0.6+0.4*random.random()) - scaleFont=pid.Font(ttf="cour",size=random.choice(range(20, 50))) - canvas.drawString(random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'), - int(random.random()*plotWidth), int(random.random()*plotHeight), font=scaleFont, - color=randomColor, angle=random.choice(range(-45, 50))) - - step = (plotWidth-20)/len(text) - startX = 20 - for item in text: - randomColor = pid.Color(0.6*random.random(),0.6*random.random(), 0.6*random.random()) - scaleFont=pid.Font(ttf="verdana",size=random.choice(range(50, 60)),bold=1) - canvas.drawString(item, startX, plotHeight/2-10, font=scaleFont, - color=randomColor, angle=random.choice(range(-45, 50))) - startX += step - # parameter: data is either object returned by reaper permutation function (called by MarkerRegressionPage.py) # or the first object returned by direct (pair-scan) permu function (called by DirectPlotPage.py) def plotBar(canvas, data, barColor=pid.blue, axesColor=pid.black, labelColor=pid.black, XLabel=None, YLabel=None, title=None, offset= (60, 20, 40, 40), zoom = 1): @@ -561,542 +204,6 @@ def plotBar(canvas, data, barColor=pid.blue, axesColor=pid.black, labelColor=pid canvas.drawString(title,xLeftOffset+(plotWidth-canvas.stringWidth(title,font=labelFont))/2.0, 20,font=labelFont,color=labelColor) -def plotBarText(canvas, data, label, variance=None, barColor=pid.blue, axesColor=pid.black, labelColor=pid.black, XLabel=None, YLabel=None, title=None, sLabel = None, offset= (80, 20, 40, 100), barSpace = 2, zoom = 1): - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - if plotHeight<=0 or plotWidth<=0: - return - - NNN = len(data) - if NNN < 2 or NNN != len(label): - return - if variance and len(variance)!=NNN: - variance = [] - - Y2 = data[:] - if variance: - for i in range(NNN): - if variance[i]: - Y2 += [data[i]-variance[i]] - - #Y axis - YLow, YTop, stepY = detScale(min(Y2), max(Y2)) - YScale = plotHeight/(YTop - YLow) - - if YLow < 0 and YTop > 0: - drawZero = 1 - else: - drawZero = 0 - - #X axis - X = range(NNN) - Xll= 0 - Xur= NNN-1 - - - if drawZero: - YZero = yTopOffset+plotHeight-YScale*(0-YLow) - canvas.drawLine(xLeftOffset, YZero, xLeftOffset+plotWidth, YZero) - else: - YZero = yTopOffset+plotHeight - #draw data - spaceWidth = barSpace - if spaceWidth < 1: - spaceWidth = 1 - barWidth = int((plotWidth - (NNN-1.0)*spaceWidth)/NNN) - - xc= xLeftOffset - scaleFont=pid.Font(ttf="verdana",size=11,bold=0) - for i in range(NNN): - yc = yTopOffset+plotHeight-(data[i]-YLow)*YScale - canvas.drawRect(xc,YZero,xc+barWidth-1, yc, edgeColor=barColor,fillColor=barColor) - if variance and variance[i]: - varlen = variance[i]*YScale - if yc-varlen < yTopOffset: - topYd = yTopOffset - else: - topYd = yc-varlen - canvas.drawLine(xc+barWidth/2-2,yc-varlen,xc+barWidth/2+2,yc-varlen,color=pid.red) - canvas.drawLine(xc+barWidth/2,yc+varlen,xc+barWidth/2,topYd,color=pid.red) - canvas.drawLine(xc+barWidth/2-2,yc+varlen,xc+barWidth/2+2,yc+varlen,color=pid.red) - strX = label[i] - canvas.drawString(strX,xc+barWidth/2.0+2,yTopOffset+plotHeight+2+canvas.stringWidth(strX,font=scaleFont),font=scaleFont,angle=90) - xc += barWidth + spaceWidth - - #draw drawing region - canvas.drawRect(xLeftOffset, yTopOffset, xLeftOffset+plotWidth, yTopOffset+plotHeight) - - #draw Y scale - scaleFont=pid.Font(ttf="cour",size=16,bold=1) - y=YLow - for i in range(stepY+1): - yc=yTopOffset+plotHeight-(y-YLow)*YScale - canvas.drawLine(xLeftOffset,yc,xLeftOffset-5,yc, color=axesColor) - strY = cformat(d=y, rank=0) - canvas.drawString(strY,xLeftOffset-canvas.stringWidth(strY,font=scaleFont)-6,yc+5,font=scaleFont) - y+= (YTop - YLow)/stepY - - #draw label - labelFont=pid.Font(ttf="verdana",size=17,bold=0) - if XLabel: - canvas.drawString(XLabel,xLeftOffset+(plotWidth-canvas.stringWidth(XLabel,font=labelFont))/2.0,yTopOffset+plotHeight+65,font=labelFont,color=labelColor) - - if YLabel: - canvas.drawString(YLabel,xLeftOffset-50, yTopOffset+plotHeight-(plotHeight-canvas.stringWidth(YLabel,font=labelFont))/2.0,font=labelFont,color=labelColor,angle=90) - - labelFont=pid.Font(ttf="verdana",size=18,bold=0) - if title: - canvas.drawString(title,xLeftOffset,yTopOffset-15,font=labelFont,color=labelColor) - - return - -#def plotXY(canvas, dataX, dataY, rank=0, dataLabel=[], plotColor = pid.black, axesColor=pid.black, labelColor=pid.black, lineSize="thin", lineColor=pid.grey, idFont="arial", idColor=pid.blue, idSize="14", symbolColor=pid.black, symbolType="circle", filled="yes", symbolSize="tiny", XLabel=None, YLabel=None, title=None, fitcurve=None, connectdot=1, displayR=None, loadingPlot = 0, offset= (80, 20, 40, 60), zoom = 1, specialCases=[], showLabel = 1, bufferSpace = 15): -# 'displayR : correlation scatter plot, loadings : loading plot' -# -# dataXRanked, dataYRanked = webqtlUtil.calRank(dataX, dataY, len(dataX)) -# -# #get ID font size -# idFontSize = int(idSize) -# -# #If filled is yes, set fill color -# if filled == "yes": -# fillColor = symbolColor -# else: -# fillColor = None -# -# if symbolSize == "large": -# sizeModifier = 7 -# fontModifier = 12 -# elif symbolSize == "medium": -# sizeModifier = 5 -# fontModifier = 8 -# elif symbolSize == "small": -# sizeModifier = 3 -# fontModifier = 3 -# else: -# sizeModifier = 1 -# fontModifier = -1 -# -# if rank == 0: # Pearson correlation -# bufferSpace = 0 -# dataXPrimary = dataX -# dataYPrimary = dataY -# dataXAlt = dataXRanked #Values used just for printing the other corr type to the graph image -# dataYAlt = dataYRanked #Values used just for printing the other corr type to the graph image -# else: # Spearman correlation: Switching Ranked and Unranked X and Y values -# dataXPrimary = dataXRanked -# dataYPrimary = dataYRanked -# dataXAlt = dataX #Values used just for printing the other corr type to the graph image -# dataYAlt = dataY #Values used just for printing the other corr type to the graph image -# -# xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset -# plotWidth = canvas.size[0] - xLeftOffset - xRightOffset -# plotHeight = canvas.size[1] - yTopOffset - yBottomOffset -# if plotHeight<=0 or plotWidth<=0: -# return -# if len(dataXPrimary) < 1 or len(dataXPrimary) != len(dataYPrimary) or (dataLabel and len(dataXPrimary) != len(dataLabel)): -# return -# -# max_X=max(dataXPrimary) -# min_X=min(dataXPrimary) -# max_Y=max(dataYPrimary) -# min_Y=min(dataYPrimary) -# -# #for some reason I forgot why I need to do this -# if loadingPlot: -# min_X = min(-0.1,min_X) -# max_X = max(0.1,max_X) -# min_Y = min(-0.1,min_Y) -# max_Y = max(0.1,max_Y) -# -# xLow, xTop, stepX=detScale(min_X,max_X) -# yLow, yTop, stepY=detScale(min_Y,max_Y) -# xScale = plotWidth/(xTop-xLow) -# yScale = plotHeight/(yTop-yLow) -# -# #draw drawing region -# canvas.drawRect(xLeftOffset-bufferSpace, yTopOffset, xLeftOffset+plotWidth, yTopOffset+plotHeight+bufferSpace) -# canvas.drawRect(xLeftOffset-bufferSpace+1, yTopOffset, xLeftOffset+plotWidth, yTopOffset+plotHeight+bufferSpace-1) -# -# #calculate data points -# data = map(lambda X, Y: (X, Y), dataXPrimary, dataYPrimary) -# xCoord = map(lambda X, Y: ((X-xLow)*xScale + xLeftOffset, yTopOffset+plotHeight-(Y-yLow)*yScale), dataXPrimary, dataYPrimary) -# -# labelFont=pid.Font(ttf=idFont,size=idFontSize,bold=0) -# -# if loadingPlot: -# xZero = -xLow*xScale+xLeftOffset -# yZero = yTopOffset+plotHeight+yLow*yScale -# for point in xCoord: -# canvas.drawLine(xZero,yZero,point[0],point[1],color=pid.red) -# else: -# if connectdot: -# canvas.drawPolygon(xCoord,edgeColor=plotColor,closed=0) -# else: -# pass -# -# symbolFont = pid.Font(ttf="fnt_bs", size=12+fontModifier,bold=0) -# -# for i, item in enumerate(xCoord): -# if dataLabel and dataLabel[i] in specialCases: -# canvas.drawRect(item[0]-3, item[1]-3, item[0]+3, item[1]+3, edgeColor=pid.green) -# #canvas.drawCross(item[0],item[1],color=pid.blue,size=5) -# else: -# if symbolType == "vertRect": -# canvas.drawRect(x1=item[0]-sizeModifier+2,y1=item[1]-sizeModifier-2, x2=item[0]+sizeModifier-1,y2=item[1]+sizeModifier+2, edgeColor=symbolColor, edgeWidth=1, fillColor=fillColor) -# elif (symbolType == "circle" and filled != "yes"): -# canvas.drawString(":", item[0]-canvas.stringWidth(":",font=symbolFont)/2+1,item[1]+2,color=symbolColor, font=symbolFont) -# elif (symbolType == "circle" and filled == "yes"): -# canvas.drawString("5", item[0]-canvas.stringWidth("5",font=symbolFont)/2+1,item[1]+2,color=symbolColor, font=symbolFont) -# elif symbolType == "horiRect": -# canvas.drawRect(x1=item[0]-sizeModifier-1,y1=item[1]-sizeModifier+3, x2=item[0]+sizeModifier+3,y2=item[1]+sizeModifier-2, edgeColor=symbolColor, edgeWidth=1, fillColor=fillColor) -# elif (symbolType == "square"): -# canvas.drawRect(x1=item[0]-sizeModifier+1,y1=item[1]-sizeModifier-4, x2=item[0]+sizeModifier+2,y2=item[1]+sizeModifier-3, edgeColor=symbolColor, edgeWidth=1, fillColor=fillColor) -# elif (symbolType == "diamond" and filled != "yes"): -# canvas.drawString(",", item[0]-canvas.stringWidth(",",font=symbolFont)/2+2, item[1]+6, font=symbolFont, color=symbolColor) -# elif (symbolType == "diamond" and filled == "yes"): -# canvas.drawString("D", item[0]-canvas.stringWidth("D",font=symbolFont)/2+2, item[1]+6, font=symbolFont, color=symbolColor) -# elif symbolType == "4-star": -# canvas.drawString("l", item[0]-canvas.stringWidth("l",font=symbolFont)/2+1, item[1]+3, font=symbolFont, color=symbolColor) -# elif symbolType == "3-star": -# canvas.drawString("k", item[0]-canvas.stringWidth("k",font=symbolFont)/2+1, item[1]+3, font=symbolFont, color=symbolColor) -# else: -# canvas.drawCross(item[0],item[1]-2,color=symbolColor, size=sizeModifier+2) -# -# if showLabel and dataLabel: -# if (symbolType == "vertRect" or symbolType == "diamond"): -# labelGap = 15 -# elif (symbolType == "4-star" or symbolType == "3-star"): -# labelGap = 12 -# else: -# labelGap = 11 -# canvas.drawString(dataLabel[i], item[0]- canvas.stringWidth(dataLabel[i], -# font=labelFont)/2 + 1, item[1]+(labelGap+sizeModifier+(idFontSize-12)), font=labelFont, color=idColor) -# -# #draw scale -# scaleFont=pid.Font(ttf="cour",size=16,bold=1) -# -# -# x=xLow -# for i in range(stepX+1): -# xc=xLeftOffset+(x-xLow)*xScale -# if ((x == 0) & (rank == 1)): -# pass -# else: -# canvas.drawLine(xc,yTopOffset+plotHeight + bufferSpace,xc,yTopOffset+plotHeight+5 + bufferSpace, color=axesColor) -# strX = cformat(d=x, rank=rank) -# if ((strX == "0") & (rank == 1)): -# pass -# else: -# canvas.drawString(strX,xc-canvas.stringWidth(strX,font=scaleFont)/2,yTopOffset+plotHeight+20 + bufferSpace,font=scaleFont) -# x+= (xTop - xLow)/stepX -# -# y=yLow -# for i in range(stepY+1): -# yc=yTopOffset+plotHeight-(y-yLow)*yScale -# if ((y == 0) & (rank == 1)): -# pass -# else: -# canvas.drawLine(xLeftOffset - bufferSpace,yc,xLeftOffset-5 - bufferSpace,yc, color=axesColor) -# strY = cformat(d=y, rank=rank) -# if ((strY == "0") & (rank == 1)): -# pass -# else: -# canvas.drawString(strY,xLeftOffset-canvas.stringWidth(strY,font=scaleFont)- 10 - bufferSpace,yc+4,font=scaleFont) -# y+= (yTop - yLow)/stepY -# -# #draw label -# -# labelFont=pid.Font(ttf="verdana",size=canvas.size[0]/45,bold=0) -# titleFont=pid.Font(ttf="verdana",size=canvas.size[0]/40,bold=0) -# -# if (rank == 1 and not title): -# canvas.drawString("Spearman Rank Correlation", xLeftOffset-canvas.size[0]*.025+(plotWidth-canvas.stringWidth("Spearman Rank Correlation",font=titleFont))/2.0, -# 25,font=titleFont,color=labelColor) -# elif (rank == 0 and not title): -# canvas.drawString("Pearson Correlation", xLeftOffset-canvas.size[0]*.025+(plotWidth-canvas.stringWidth("Pearson Correlation",font=titleFont))/2.0, -# 25,font=titleFont,color=labelColor) -# -# if XLabel: -# canvas.drawString(XLabel,xLeftOffset+(plotWidth-canvas.stringWidth(XLabel,font=labelFont))/2.0, -# yTopOffset+plotHeight+yBottomOffset-25,font=labelFont,color=labelColor) -# -# if YLabel: -# canvas.drawString(YLabel, xLeftOffset-65, yTopOffset+plotHeight- (plotHeight-canvas.stringWidth(YLabel,font=labelFont))/2.0, -# font=labelFont,color=labelColor,angle=90) -# -# labelFont=pid.Font(ttf="verdana",size=20,bold=0) -# if title: -# canvas.drawString(title,xLeftOffset+(plotWidth-canvas.stringWidth(title,font=labelFont))/2.0, -# 20,font=labelFont,color=labelColor) -# -# if fitcurve: -# import sys -# sys.argv = [ "mod_python" ] -# #from numarray import linear_algebra as la -# #from numarray import ones, array, dot, swapaxes -# fitYY = array(dataYPrimary) -# fitXX = array([ones(len(dataXPrimary)),dataXPrimary]) -# AA = dot(fitXX,swapaxes(fitXX,0,1)) -# BB = dot(fitXX,fitYY) -# bb = la.linear_least_squares(AA,BB)[0] -# -# xc1 = xLeftOffset -# yc1 = yTopOffset+plotHeight-(bb[0]+bb[1]*xLow-yLow)*yScale -# if yc1 > yTopOffset+plotHeight: -# yc1 = yTopOffset+plotHeight -# xc1 = (yLow-bb[0])/bb[1] -# xc1=(xc1-xLow)*xScale+xLeftOffset -# elif yc1 < yTopOffset: -# yc1 = yTopOffset -# xc1 = (yTop-bb[0])/bb[1] -# xc1=(xc1-xLow)*xScale+xLeftOffset -# else: -# pass -# -# xc2 = xLeftOffset + plotWidth -# yc2 = yTopOffset+plotHeight-(bb[0]+bb[1]*xTop-yLow)*yScale -# if yc2 > yTopOffset+plotHeight: -# yc2 = yTopOffset+plotHeight -# xc2 = (yLow-bb[0])/bb[1] -# xc2=(xc2-xLow)*xScale+xLeftOffset -# elif yc2 < yTopOffset: -# yc2 = yTopOffset -# xc2 = (yTop-bb[0])/bb[1] -# xc2=(xc2-xLow)*xScale+xLeftOffset -# else: -# pass -# -# canvas.drawLine(xc1 - bufferSpace,yc1 + bufferSpace,xc2,yc2,color=lineColor) -# if lineSize == "medium": -# canvas.drawLine(xc1 - bufferSpace,yc1 + bufferSpace+1,xc2,yc2+1,color=lineColor) -# if lineSize == "thick": -# canvas.drawLine(xc1 - bufferSpace,yc1 + bufferSpace+1,xc2,yc2+1,color=lineColor) -# canvas.drawLine(xc1 - bufferSpace,yc1 + bufferSpace-1,xc2,yc2-1,color=lineColor) -# -# -# if displayR: -# labelFont=pid.Font(ttf="trebuc",size=canvas.size[0]/60,bold=0) -# NNN = len(dataX) -# corr = webqtlUtil.calCorrelation(dataXPrimary,dataYPrimary,NNN)[0] -# -# if NNN < 3: -# corrPValue = 1.0 -# else: -# if abs(corr) >= 1.0: -# corrPValue = 0.0 -# else: -# ZValue = 0.5*log((1.0+corr)/(1.0-corr)) -# ZValue = ZValue*sqrt(NNN-3) -# corrPValue = 2.0*(1.0 - reaper.normp(abs(ZValue))) -# -# NStr = "N = %d" % NNN -# strLenN = canvas.stringWidth(NStr,font=labelFont) -# -# if rank == 1: -# if corrPValue < 0.0000000000000001: -# corrStr = "Rho = %1.3f P < 1.00 E-16" % (corr) -# else: -# corrStr = "Rho = %1.3f P = %3.2E" % (corr, corrPValue) -# else: -# if corrPValue < 0.0000000000000001: -# corrStr = "r = %1.3f P < 1.00 E-16" % (corr) -# else: -# corrStr = "r = %1.3f P = %3.2E" % (corr, corrPValue) -# strLen = canvas.stringWidth(corrStr,font=labelFont) -# -# canvas.drawString(NStr,xLeftOffset,yTopOffset-10,font=labelFont,color=labelColor) -# canvas.drawString(corrStr,xLeftOffset+plotWidth-strLen,yTopOffset-10,font=labelFont,color=labelColor) -# -# return xCoord - -def plotXYSVG(drawSpace, dataX, dataY, rank=0, dataLabel=[], plotColor = "black", axesColor="black", labelColor="black", symbolColor="red", XLabel=None, YLabel=None, title=None, fitcurve=None, connectdot=1, displayR=None, loadingPlot = 0, offset= (80, 20, 40, 60), zoom = 1, specialCases=[], showLabel = 1): - 'displayR : correlation scatter plot, loadings : loading plot' - - dataXRanked, dataYRanked = webqtlUtil.calRank(dataX, dataY, len(dataX)) - - # Switching Ranked and Unranked X and Y values if a Spearman Rank Correlation - if rank == 0: - dataXPrimary = dataX - dataYPrimary = dataY - dataXAlt = dataXRanked - dataYAlt = dataYRanked - - else: - dataXPrimary = dataXRanked - dataYPrimary = dataYRanked - dataXAlt = dataX - dataYAlt = dataY - - - - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = drawSpace.attributes['width'] - xLeftOffset - xRightOffset - plotHeight = drawSpace.attributes['height'] - yTopOffset - yBottomOffset - if plotHeight<=0 or plotWidth<=0: - return - if len(dataXPrimary) < 1 or len(dataXPrimary) != len(dataYPrimary) or (dataLabel and len(dataXPrimary) != len(dataLabel)): - return - - max_X=max(dataXPrimary) - min_X=min(dataXPrimary) - max_Y=max(dataYPrimary) - min_Y=min(dataYPrimary) - - #for some reason I forgot why I need to do this - if loadingPlot: - min_X = min(-0.1,min_X) - max_X = max(0.1,max_X) - min_Y = min(-0.1,min_Y) - max_Y = max(0.1,max_Y) - - xLow, xTop, stepX=detScale(min_X,max_X) - yLow, yTop, stepY=detScale(min_Y,max_Y) - xScale = plotWidth/(xTop-xLow) - yScale = plotHeight/(yTop-yLow) - - #draw drawing region - r = svg.rect(xLeftOffset, yTopOffset, plotWidth, plotHeight, 'none', axesColor, 1) - drawSpace.addElement(r) - - #calculate data points - data = map(lambda X, Y: (X, Y), dataXPrimary, dataYPrimary) - xCoord = map(lambda X, Y: ((X-xLow)*xScale + xLeftOffset, yTopOffset+plotHeight-(Y-yLow)*yScale), dataXPrimary, dataYPrimary) - labelFontF = "verdana" - labelFontS = 11 - - if loadingPlot: - xZero = -xLow*xScale+xLeftOffset - yZero = yTopOffset+plotHeight+yLow*yScale - for point in xCoord: - drawSpace.addElement(svg.line(xZero,yZero,point[0],point[1], "red", 1)) - else: - if connectdot: - pass - #drawSpace.drawPolygon(xCoord,edgeColor=plotColor,closed=0) - else: - pass - - for i, item in enumerate(xCoord): - if dataLabel and dataLabel[i] in specialCases: - drawSpace.addElement(svg.rect(item[0]-3, item[1]-3, 6, 6, "none", "green", 0.5)) - #drawSpace.drawCross(item[0],item[1],color=pid.blue,size=5) - else: - drawSpace.addElement(svg.line(item[0],item[1]+5,item[0],item[1]-5,symbolColor,1)) - drawSpace.addElement(svg.line(item[0]+5,item[1],item[0]-5,item[1],symbolColor,1)) - if showLabel and dataLabel: - pass - drawSpace.addElement(svg.text(item[0], item[1]+14, dataLabel[i], labelFontS, - labelFontF, text_anchor="middle", style="stroke:blue;stroke-width:0.5;")) - #canvas.drawString(, item[0]- canvas.stringWidth(dataLabel[i], - # font=labelFont)/2, item[1]+14, font=labelFont, color=pid.blue) - - #draw scale - #scaleFont=pid.Font(ttf="cour",size=14,bold=1) - x=xLow - for i in range(stepX+1): - xc=xLeftOffset+(x-xLow)*xScale - drawSpace.addElement(svg.line(xc,yTopOffset+plotHeight,xc,yTopOffset+plotHeight+5, axesColor, 1)) - strX = cformat(d=x, rank=rank) - drawSpace.addElement(svg.text(xc,yTopOffset+plotHeight+20,strX,13, "courier", text_anchor="middle")) - x+= (xTop - xLow)/stepX - - y=yLow - for i in range(stepY+1): - yc=yTopOffset+plotHeight-(y-yLow)*yScale - drawSpace.addElement(svg.line(xLeftOffset,yc,xLeftOffset-5,yc, axesColor, 1)) - strY = cformat(d=y, rank=rank) - drawSpace.addElement(svg.text(xLeftOffset-10,yc+5,strY,13, "courier", text_anchor="end")) - y+= (yTop - yLow)/stepY - - #draw label - labelFontF = "verdana" - labelFontS = 17 - if XLabel: - drawSpace.addElement(svg.text(xLeftOffset+plotWidth/2.0, - yTopOffset+plotHeight+yBottomOffset-10,XLabel, - labelFontS, labelFontF, text_anchor="middle")) - - if YLabel: - drawSpace.addElement(svg.text(xLeftOffset-50, - yTopOffset+plotHeight/2,YLabel, - labelFontS, labelFontF, text_anchor="middle", style="writing-mode:tb-rl", transform="rotate(270 %d %d)" % (xLeftOffset-50, yTopOffset+plotHeight/2))) - #drawSpace.drawString(YLabel, xLeftOffset-50, yTopOffset+plotHeight- (plotHeight-drawSpace.stringWidth(YLabel,font=labelFont))/2.0, - # font=labelFont,color=labelColor,angle=90) - - - if fitcurve: - sys.argv = [ "mod_python" ] - #from numarray import linear_algebra as la - #from numarray import ones, array, dot, swapaxes - fitYY = array(dataYPrimary) - fitXX = array([ones(len(dataXPrimary)),dataXPrimary]) - AA = dot(fitXX,swapaxes(fitXX,0,1)) - BB = dot(fitXX,fitYY) - bb = la.linear_least_squares(AA,BB)[0] - - xc1 = xLeftOffset - yc1 = yTopOffset+plotHeight-(bb[0]+bb[1]*xLow-yLow)*yScale - if yc1 > yTopOffset+plotHeight: - yc1 = yTopOffset+plotHeight - xc1 = (yLow-bb[0])/bb[1] - xc1=(xc1-xLow)*xScale+xLeftOffset - elif yc1 < yTopOffset: - yc1 = yTopOffset - xc1 = (yTop-bb[0])/bb[1] - xc1=(xc1-xLow)*xScale+xLeftOffset - else: - pass - - xc2 = xLeftOffset + plotWidth - yc2 = yTopOffset+plotHeight-(bb[0]+bb[1]*xTop-yLow)*yScale - if yc2 > yTopOffset+plotHeight: - yc2 = yTopOffset+plotHeight - xc2 = (yLow-bb[0])/bb[1] - xc2=(xc2-xLow)*xScale+xLeftOffset - elif yc2 < yTopOffset: - yc2 = yTopOffset - xc2 = (yTop-bb[0])/bb[1] - xc2=(xc2-xLow)*xScale+xLeftOffset - else: - pass - - drawSpace.addElement(svg.line(xc1,yc1,xc2,yc2,"green", 1)) - - if displayR: - labelFontF = "trebuc" - labelFontS = 14 - NNN = len(dataX) - - corr = webqtlUtil.calCorrelation(dataXPrimary,dataYPrimary,NNN)[0] - - if NNN < 3: - corrPValue = 1.0 - else: - if abs(corr) >= 1.0: - corrPValue = 0.0 - else: - ZValue = 0.5*log((1.0+corr)/(1.0-corr)) - ZValue = ZValue*sqrt(NNN-3) - corrPValue = 2.0*(1.0 - reaper.normp(abs(ZValue))) - - NStr = "N of Cases=%d" % NNN - - if rank == 1: - corrStr = "Spearman's r=%1.3f P=%3.2E" % (corr, corrPValue) - else: - corrStr = "Pearson's r=%1.3f P=%3.2E" % (corr, corrPValue) - - drawSpace.addElement(svg.text(xLeftOffset,yTopOffset-10,NStr, - labelFontS, labelFontF, text_anchor="start")) - drawSpace.addElement(svg.text(xLeftOffset+plotWidth,yTopOffset-25,corrStr, - labelFontS, labelFontF, text_anchor="end")) - """ - """ - return - - # This function determines the scale of the plot def detScaleOld(min,max): if min>=max: @@ -1114,7 +221,7 @@ def detScaleOld(min,max): high=c*ceil(max/c) return [low,high,round((high-low)/c)] -def detScale(min=0,max=0,bufferSpace=3): +def detScale(min=0,max=0): if min>=max: return None @@ -1151,57 +258,9 @@ def detScale(min=0,max=0,bufferSpace=3): return [low,high,n] - - -def colorSpectrumOld(n): - if n == 1: - return [pid.Color(1,0,0)] - elif n == 2: - return [pid.Color(1,0,0),pid.Color(0,0,1)] - elif n == 3: - return [pid.Color(1,0,0),pid.Color(0,1,0),pid.Color(0,0,1)] - else: - step = 2.0/(n-1) - red = 1.0 - green = 0.0 - blue = 0.0 - colors = [pid.Color(red,green,blue)] - i = 1 - greenpeak = 0 - while i < n: - if red >= step: - red -= step - green += step - if green >= 1.0: - greenpeak = 1 - blue += green -1.0 - green = 1.0 - else: - red = 0.0 - if greenpeak: - green -= step - blue += step - else: - green += step - if green >= 1.0: - greenpeak = 1 - blue += green -1.0 - green = 2.0 -green - elif green < 0.0: - green = 0.0 - else: - pass - colors.append(pid.Color(red,green,blue)) - i += 1 - return colors - - - - def bluefunc(x): return 1.0 / (1.0 + exp(-10*(x-0.6))) - def redfunc(x): return 1.0 / (1.0 + exp(10*(x-0.5))) @@ -1230,52 +289,10 @@ def colorSpectrum(n=100): out2.append(out[-1]) return out2 - -def colorSpectrumSVG(n=100): - multiple = 10 - if n == 1: - return ["rgb(255,0,0)"] - elif n == 2: - return ["rgb(255,0,0)","rgb(0,0,255)"] - elif n == 3: - return ["rgb(255,0,0)","rgb(0,255,0)","rgb(0,0,255)"] - N = n*multiple - out = [None]*N; - for i in range(N): - x = float(i)/N - out[i] = "rgb(%d, %d, %d)" % (redfunc(x)*255, greenfunc(x)*255, bluefunc(x)*255); - out2 = [out[0]] - step = N/float(n-1) - j = 0 - for i in range(n-2): - j += step - out2.append(out[int(j)]) - out2.append(out[-1]) - return out2 - - -def BWSpectrum(n=100): - multiple = 10 - if n == 1: - return [pid.Color(0,0,0)] - elif n == 2: - return [pid.Color(0,0,0),pid.Color(1,1,1)] - elif n == 3: - return [pid.Color(0,0,0),pid.Color(0.5,0.5,0.5),pid.Color(1,1,1)] - - step = 1.0/n - x = 0.0 - out = [] - for i in range(n): - out.append(pid.Color(x,x,x)); - x += step - return out - - def _test(): import doctest doctest.testmod() if __name__=="__main__": - _test() + _test()
\ No newline at end of file diff --git a/wqflask/utility/after.py b/wqflask/utility/after.py index a3bb85e9..b628a0a4 100644 --- a/wqflask/utility/after.py +++ b/wqflask/utility/after.py @@ -13,10 +13,4 @@ def after_this_request(f): if not hasattr(g, 'after_request_callbacks'): g.after_request_callbacks = [] g.after_request_callbacks.append(f) - return f - -@app.after_request -def call_after_request_callbacks(response): - for callback in getattr(g, 'after_request_callbacks', ()): - callback(response) - return response + return f
\ No newline at end of file diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py index 8c97370d..8f1c916b 100644 --- a/wqflask/utility/benchmark.py +++ b/wqflask/utility/benchmark.py @@ -11,7 +11,7 @@ logger = getLogger(__name__ ) class Bench(object): entries = collections.OrderedDict() - def __init__(self, name=None, write_output=True): + def __init__(self, name=None, write_output=LOG_BENCH): self.name = name self.write_output = write_output diff --git a/wqflask/basicStatistics/corestats.py b/wqflask/utility/corestats.py index eba84c52..67ca3ad3 100644 --- a/wqflask/basicStatistics/corestats.py +++ b/wqflask/utility/corestats.py @@ -13,11 +13,9 @@ # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. - - import sys - +#ZS: Should switch to using some third party library for this; maybe scipy has an equivalent class Stats: def __init__(self, sequence): @@ -25,47 +23,33 @@ class Stats: # convert all items to floats for numerical processing self.sequence = [float(item) for item in sequence] - def sum(self): if len(self.sequence) < 1: return None else: return sum(self.sequence) - def count(self): return len(self.sequence) - def min(self): if len(self.sequence) < 1: return None else: return min(self.sequence) - def max(self): if len(self.sequence) < 1: return None else: return max(self.sequence) - def avg(self): if len(self.sequence) < 1: return None else: return sum(self.sequence) / len(self.sequence) - - def median(self): - if len(self.sequence) < 1: - return None - else: - self.sequence.sort() - return self.sequence[len(self.sequence) // 2] - - def stdev(self): if len(self.sequence) < 1: return None @@ -75,7 +59,6 @@ class Stats: stdev = (sdsq / (len(self.sequence) - 1)) ** .5 return stdev - def percentile(self, percentile): if len(self.sequence) < 1: value = None @@ -88,9 +71,6 @@ class Stats: value = self.sequence[element_idx] return value - - - # Sample script using this class: # ------------------------------------------- # #!/usr/bin/env python @@ -100,4 +80,4 @@ class Stats: # stats = corestats.Stats(sequence) # print stats.avg() # print stats.percentile(90) -# ------------------------------------------- +# -------------------------------------------
\ No newline at end of file diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py index 7149e560..af306731 100644 --- a/wqflask/utility/genofile_parser.py +++ b/wqflask/utility/genofile_parser.py @@ -72,7 +72,7 @@ class ConvertGenoFile(object): yield row def process_csv(self): - for row_count, row in enumerate(self.process_rows()): + for row in self.process_rows(): row_items = row.split("\t") this_marker = Marker() diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py index 128706df..510b1041 100644 --- a/wqflask/utility/logger.py +++ b/wqflask/utility/logger.py @@ -33,7 +33,7 @@ from pprint import pformat as pf from inspect import stack import datetime -from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL, LOG_FORMAT +from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL class GNLogger: """A logger class with some additional functionality, such as diff --git a/wqflask/utility/webqtlUtil.py b/wqflask/utility/webqtlUtil.py index 4fc978f5..83fa90b7 100644 --- a/wqflask/utility/webqtlUtil.py +++ b/wqflask/utility/webqtlUtil.py @@ -34,9 +34,6 @@ from htmlgen import HTMLgen2 as HT from base import webqtlConfig - - - # NL, 07/27/2010. moved from webqtlForm.py #Dict of Parents and F1 information, In the order of [F1, Mat, Pat] ParInfo ={ @@ -64,173 +61,10 @@ ParInfo ={ 'SXM':['SMF1', 'MSF1', 'Steptoe','Morex'] } - -# NL, 07/27/2010. moved from template.py -IMGSTEP1 = HT.Image('/images/step1.gif', alt='STEP 1',border=0) #XZ, Only be used in inputPage.py -IMGSTEP2 = HT.Image('/images/step2.gif', alt='STEP 2',border=0) #XZ, Only be used in inputPage.py -IMGSTEP3 = HT.Image('/images/step3.gif', alt='STEP 3',border=0) #XZ, Only be used in inputPage.py -IMGNEXT = HT.Image('/images/arrowdown.gif', alt='NEXT',border=0) #XZ, Only be used in inputPage.py - -IMGASC = HT.Image("/images/sortup.gif", border=0) -IMGASCON = HT.Image("/images/sortupon.gif", border=0) -IMGDESC = HT.Image("/images/sortdown.gif", border=0) -IMGDESCON = HT.Image("/images/sortdownon.gif", border=0) - -""" -IMGASC = HT.Image("/images/sortup_icon.gif", border=0) -IMGASCON = HT.Image("/images/sortupon.gif", border=0) -IMGDESC = HT.Image("/images/sortdown_icon.gif", border=0) -IMGDESCON = HT.Image("/images/sortdownon.gif", border=0) -IMG_UNSORTED = HT.Image("/images/unsorted_icon.gif", border=0) -""" - -PROGRESSBAR = HT.Image('/images/waitAnima2.gif', alt='checkblue',align="middle",border=0) - ######################################### # Accessory Functions ######################################### -def inverseCumul(p): - #Coefficients in rational approximations. - a = [-3.969683028665376e+01,2.209460984245205e+02,-2.759285104469687e+02,1.383577518672690e+02,-3.066479806614716e+01,2.506628277459239e+00] - - b = [-5.447609879822406e+01,1.615858368580409e+02,-1.556989798598866e+02,6.680131188771972e+01,-1.328068155288572e+01] - - c = [-7.784894002430293e-03,-3.223964580411365e-01,-2.400758277161838e+00,-2.549732539343734e+00,4.374664141464968e+00,2.938163982698783e+00] - - d = [7.784695709041462e-03,3.224671290700398e-01,2.445134137142996e+00,3.754408661907416e+00] - - #Define break-points. - - p_low = 0.02425 - p_high = 1 - p_low - - #Rational approximation for lower region. - - if p > 0 and p < p_low: - q = sqrt(-2*log(p)) - x = (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1) - - - #Rational approximation for central region. - - elif p>= p_low and p <= p_high: - q = p - 0.5 - r = q*q - x = (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /(((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1) - - #Rational approximation for upper region. - - elif p>p_high and p < 1: - q = sqrt(-2*log(1-p)) - x = -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1) - - else: - return None - - if p>0 and p < 1: - e = 0.5 * erfcc(-x/sqrt(2)) - p - u = e * sqrt(2*pi) * exp(x*x/2) - x = x - u/(1 + x*u/2) - return x - else: - return None - -def erfcc(x): - z=abs(x) - t=1.0/(1.0+0.5*z) - ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277))))))))) - if x>=0.0: - return ans - else: - return 2.0-ans - -def U(n): - x=pow(0.5,1.0/n) - m=[1-x] - for i in range(2,n): - a=(i-0.3175)/(n+0.365) - m.append(a) - m.append(x) - return m - -def decodeEscape(str): - a = str - pattern = re.compile('(%[0-9A-Fa-f][0-9A-Fa-f])') - match = pattern.findall(a) - matched = [] - for item in match: - if item not in matched: - a = a.replace(item, '%c' % eval("0x"+item[-2:])) - matched.append(item) - return a - -def exportData(hddn, tdata, NP = None): - for key in tdata.keys(): - _val, _var, _N = tdata[key].val, tdata[key].var, tdata[key].N - if _val != None: - hddn[key] = _val - if _var != None: - hddn['V'+key] = _var - if NP and _N != None: - hddn['N'+key] = _N - -def genShortStrainName(RISet='', input_strainName=''): - #aliasStrainDict = {'C57BL/6J':'B6','DBA/2J':'D2'} - strainName = input_strainName - if RISet != 'AXBXA': - if RISet == 'BXD300': - this_RISet = 'BXD' - elif RISet == 'BDF2-2005': - this_RISet = 'CASE05_' - else: - this_RISet = RISet - strainName = string.replace(strainName,this_RISet,'') - strainName = string.replace(strainName,'CASE','') - try: - strainName = "%02d" % int(strainName) - except: - pass - else: - strainName = string.replace(strainName,'AXB','A') - strainName = string.replace(strainName,'BXA','B') - try: - strainName = strainName[0] + "%02d" % int(strainName[1:]) - except: - pass - return strainName - -def toInt(in_str): - "Converts an arbitrary string to an unsigned integer" - start = -1 - end = -1 - for i, char in enumerate(in_str): - if char >= '0' and char <= '9': - if start < 0: - start = i - end = i+1 - else: - if start >= 0: - break - if start < end: - return int(in_str[start:end]) - else: - return -1 - -def transpose(m): - 'transpose a matrix' - n = len(m) - return [[m[j][i] for i in range(len(m[0])) for j in range(n)][k*n:k*n+n] for k in range(len(m[0]))] - -def asymTranspose(m): - 'transpose a matrix' - t = max(map(len, m)) - n = len(m) - m2 = [["-"]]*n - for i in range(n): - m2[i] = m[i] + [""]*(t- len(m[i])) - return [[m2[j][i] for i in range(len(m2[0])) for j in range(n)][k*n:k*n+n] for k in range(len(m2[0]))] - def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): from random import choice _str = prefix[:] @@ -238,84 +72,6 @@ def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): _str += choice(chars) return _str -def generate_session(): - import sha - return sha.new(str(time.time())).hexdigest() - -def cvt2Dict(x): - tmp = {} - for key in x.keys(): - tmp[key] = x[key] - return tmp - -def dump_session(session_obj, filename): - "It seems mod python can only cPickle most basic data type" - import cPickle - session_file = open(filename, 'wb') - #try: - # pass - #except: - # pass - cPickle.dump(session_obj, session_file) - session_file.close() - -def StringAsFloat(str): - 'Converts string to float but catches any exception and returns None' - try: - return float(str) - except: - return None - -def IntAsFloat(str): - 'Converts string to Int but catches any exception and returns None' - try: - return int(str) - except: - return None - -def FloatAsFloat(flt): - 'Converts float to string but catches any exception and returns None' - try: - return float("%2.3f" % flt) - except: - return None - -def RemoveZero(flt): - 'Converts string to float but catches any exception and returns None' - try: - if abs(flt) < 1e-6: - return None - else: - return flt - except: - return None - - -def SciFloat(d): - 'Converts string to float but catches any exception and returns None' - - try: - if abs(d) <= 1.0e-4: - return "%1.2e" % d - else: - return "%1.5f" % d - except: - return None - -###To be removed -def FloatList2String(lst): - 'Converts float list to string but catches any exception and returns None' - tt='' - try: - for item in lst: - if item == None: - tt += 'X ' - else: - tt += '%f ' % item - return tt - except: - return "" - def ListNotNull(lst): '''Obsolete - Use built in function any (or all or whatever) @@ -328,427 +84,6 @@ def ListNotNull(lst): return 1 return None -###To be removed -def FileDataProcess(str): - 'Remove the description text from the input file if theres any' - i=0 - while i<len(str): - if str[i]<'\x7f' and str[i]>'\x20': - break - else: - i+=1 - str=str[i:] - str=string.join(string.split(str,'\000'),'') - i=string.find(str,"*****") - if i>-1: - return str[i+5:] - else: - return str - -def rank(a,lst,offset=0): - """Calculate the integer rank of a number in an array, can be used to calculate p-value""" - n = len(lst) - if n == 2: - if a <lst[0]: - return offset - elif a > lst[1]: - return offset + 2 - else: - return offset +1 - elif n == 1: - if a <lst[0]: - return offset - else: - return offset +1 - elif n== 0: - return offset - else: - mid = n/2 - if a < lst[mid]: - return rank(a,lst[:mid-1],offset) - else: - return rank(a,lst[mid:],offset+mid) - -def cmpScanResult(A,B): - try: - if A.LRS > B.LRS: - return 1 - elif A.LRS == B.LRS: - return 0 - else: - return -1 - except: - return 0 - - -def cmpScanResult2(A,B): - try: - if A.LRS < B.LRS: - return 1 - elif A.LRS == B.LRS: - return 0 - else: - return -1 - except: - return 0 - -def cmpOrder(A,B): - try: - if A[1] < B[1]: - return -1 - elif A[1] == B[1]: - return 0 - else: - return 1 - except: - return 0 - -def cmpOrder2(A,B): - try: - if A[-1] < B[-1]: - return -1 - elif A[-1] == B[-1]: - return 0 - else: - return 1 - except: - return 0 - - - - -def calRank(xVals, yVals, N): ### Zach Sloan, February 4 2010 - """ - Returns a ranked set of X and Y values. These are used when generating - a Spearman scatterplot. Bear in mind that this sets values equal to each - other as the same rank. - """ - XX = [] - YY = [] - X = [0]*len(xVals) - Y = [0]*len(yVals) - j = 0 - - for i in range(len(xVals)): - - if xVals[i] != None and yVals[i] != None: - XX.append((j, xVals[i])) - YY.append((j, yVals[i])) - j = j + 1 - - NN = len(XX) - - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - - j = 1 - rank = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - return (X,Y) - -def calCorrelationRank(xVals,yVals,N): - """ - Calculated Spearman Ranked Correlation. The algorithm works - by setting all tied ranks to the average of those ranks (for - example, if ranks 5-10 all have the same value, each will be set - to rank 7.5). - """ - - XX = [] - YY = [] - j = 0 - - for i in range(len(xVals)): - if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"): - XX.append((j,xVals[i])) - YY.append((j,yVals[i])) - j = j+1 - - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - -def calCorrelationRankText(dbdata,userdata,N): ### dcrowell = David Crowell, July 2008 - """Calculates correlation ranks with data formatted from the text file. - dbdata, userdata are lists of strings. N is an int. Returns a float. - Used by correlationPage""" - XX = [] - YY = [] - j = 0 - for i in range(N): - if (dbdata[i]!= None and userdata[i]!=None) and (dbdata[i]!= 'None' and userdata[i]!='None'): - XX.append((j,float(dbdata[i]))) - YY.append((j,float(userdata[i]))) - j += 1 - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - - -def calCorrelation(dbdata,userdata,N): - X = [] - Y = [] - for i in range(N): - if dbdata[i]!= None and userdata[i]!= None: - X.append(dbdata[i]) - Y.append(userdata[i]) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = reduce(lambda x,y:x+y,X,0.0) - sy = reduce(lambda x,y:x+y,Y,0.0) - meanx = sx/NN - meany = sy/NN - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - xyd += (X[i] - meanx)*(Y[i]-meany) - sxd += (X[i] - meanx)*(X[i] - meanx) - syd += (Y[i] - meany)*(Y[i] - meany) - try: - corr = xyd/(sqrt(sxd)*sqrt(syd)) - except: - corr = 0 - return (corr,NN) - -def calCorrelationText(dbdata,userdata,N): ### dcrowell July 2008 - """Calculates correlation coefficients with values formatted from text files. dbdata, userdata are lists of strings. N is an int. Returns a float - Used by correlationPage""" - X = [] - Y = [] - for i in range(N): - #if (dbdata[i]!= None and userdata[i]!= None) and (dbdata[i]!= 'None' and userdata[i]!= 'None'): - # X.append(float(dbdata[i])) - # Y.append(float(userdata[i])) - if dbdata[i] == None or dbdata[i] == 'None' or userdata[i] == None or userdata[i] == 'None': - continue - else: - X.append(float(dbdata[i])) - Y.append(float(userdata[i])) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = sum(X) - sy = sum(Y) - meanx = sx/float(NN) - meany = sy/float(NN) - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - x1 = X[i]-meanx - y1 = Y[i]-meany - xyd += x1*y1 - sxd += x1**2 - syd += y1**2 - try: - corr = xyd/(sqrt(sxd)*sqrt(syd)) - except: - corr = 0 - return (corr,NN) - - def readLineCSV(line): ### dcrowell July 2008 """Parses a CSV string of text and returns a list containing each element as a string. Used by correlationPage""" @@ -757,45 +92,6 @@ def readLineCSV(line): ### dcrowell July 2008 returnList[0]=returnList[0][1:] return returnList - -def cmpCorr(A,B): - try: - if abs(A[1]) < abs(B[1]): - return 1 - elif abs(A[1]) == abs(B[1]): - return 0 - else: - return -1 - except: - return 0 - -def cmpLitCorr(A,B): - try: - if abs(A[3]) < abs(B[3]): return 1 - elif abs(A[3]) == abs(B[3]): - if abs(A[1]) < abs(B[1]): return 1 - elif abs(A[1]) == abs(B[1]): return 0 - else: return -1 - else: return -1 - except: - return 0 - -def cmpPValue(A,B): - try: - if A.corrPValue < B.corrPValue: - return -1 - elif A.corrPValue == B.corrPValue: - if abs(A.corr) > abs(B.corr): - return -1 - elif abs(A.corr) < abs(B.corr): - return 1 - else: - return 0 - else: - return 1 - except: - return 0 - def cmpEigenValue(A,B): try: if A[0] > B[0]: @@ -807,80 +103,6 @@ def cmpEigenValue(A,B): except: return 0 - -def cmpLRSFull(A,B): - try: - if A[0] < B[0]: - return -1 - elif A[0] == B[0]: - return 0 - else: - return 1 - except: - return 0 - -def cmpLRSInteract(A,B): - try: - if A[1] < B[1]: - return -1 - elif A[1] == B[1]: - return 0 - else: - return 1 - except: - return 0 - - -def cmpPos(A,B): - try: - try: - AChr = int(A.chr) - except: - AChr = 20 - try: - BChr = int(B.chr) - except: - BChr = 20 - if AChr > BChr: - return 1 - elif AChr == BChr: - if A.mb > B.mb: - return 1 - if A.mb == B.mb: - return 0 - else: - return -1 - else: - return -1 - except: - return 0 - -def cmpGenoPos(A,B): - try: - A1 = A.chr - B1 = B.chr - try: - A1 = int(A1) - except: - A1 = 25 - try: - B1 = int(B1) - except: - B1 = 25 - if A1 > B1: - return 1 - elif A1 == B1: - if A.mb > B.mb: - return 1 - if A.mb == B.mb: - return 0 - else: - return -1 - else: - return -1 - except: - return 0 - def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users): access_to_confidential_phenotype_trait = 0 if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: @@ -889,142 +111,4 @@ def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users) AuthorizedUsersList=map(string.strip, string.split(authorized_users, ',')) if AuthorizedUsersList.__contains__(userName): access_to_confidential_phenotype_trait = 1 - return access_to_confidential_phenotype_trait - - -class VisualizeException(Exception): - def __init__(self, message): - self.message = message - def __str__(self): - return self.message - -# safeConvert : (string -> A) -> A -> A -# to convert a string to type A, using the supplied default value -# if the given conversion function doesn't work -def safeConvert(f, value, default): - try: - return f(value) - except: - return default - -# safeFloat : string -> float -> float -# to convert a string to a float safely -def safeFloat(value, default): - return safeConvert(float, value, default) - -# safeInt: string -> int -> int -# to convert a string to an int safely -def safeInt(value, default): - return safeConvert(int, value, default) - -# safeString : string -> (arrayof string) -> string -> string -# if a string is not in a list of strings to pick a default value -# for that string -def safeString(value, validChoices, default): - if value in validChoices: - return value - else: - return default - -# yesNoToInt: string -> int -# map "yes" -> 1 and "no" -> 0 -def yesNoToInt(value): - if value == "yes": - return 1 - elif value == "no": - return 0 - else: - return None - -# IntToYesNo: int -> string -# map 1 -> "yes" and 0 -> "no" -def intToYesNo(value): - if value == 1: - return "yes" - elif value == 0: - return "no" - else: - return None - -def formatField(name): - name = name.replace("_", " ") - name = name.title() - #name = name.replace("Mb Mm6", "Mb"); - return name.replace("Id", "ID") - -#XZ, 03/27/2009: This function is very specific. -#It is used by AJAX_table.py, correlationPage.py and dataPage.py - - -def genTableObj(tblobj=None, file="", sortby = ("", ""), tableID = "sortable", addIndex = "1", hiddenColumns=[]): - header = tblobj['header'] - body = tblobj['body'] - field, order = sortby - - #ZAS 9/12/2011 - The hiddenColumns array needs to be converted into a string so they can be placed into the javascript of each up/down button - hiddenColumnsString = ",".join(hiddenColumns) - - tbl = HT.TableLite(Class="collap b2", cellspacing=1, cellpadding=5) - - hiddenColumnIdx = [] #indices of columns to hide - idx = -1 - last_idx = 0 #ZS: This is the index of the last item in the regular table header (without any extra parameters). It is used to determine the index of each extra parameter. - for row in header: - hr = HT.TR() - for i, item in enumerate(row): - if (item.text == '') or (item.text not in hiddenColumns): - if item.sort and item.text: - down = HT.Href("javascript:xmlhttpPost('%smain.py?FormID=AJAX_table', '%s', 'sort=%s&order=down&file=%s&tableID=%s&addIndex=%s&hiddenColumns=%s')" % (webqtlConfig.CGIDIR, tableID, item.text, file, tableID, addIndex, hiddenColumnsString),IMGDESC) - up = HT.Href("javascript:xmlhttpPost('%smain.py?FormID=AJAX_table', '%s', 'sort=%s&order=up&file=%s&tableID=%s&addIndex=%s&hiddenColumns=%s')" % (webqtlConfig.CGIDIR, tableID, item.text, file, tableID, addIndex, hiddenColumnsString),IMGASC) - if item.text == field: - idx = item.idx - last_idx = idx - if order == 'up': - up = IMGASCON - elif order == 'down': - down = IMGDESCON - item.html.append(HT.Div(up, down, style="float: bottom;")) - hr.append(item.html) - else: - hiddenColumnIdx.append(i) - tbl.append(hr) - - for i, row in enumerate(body): - for j, item in enumerate(row): - if order == 'down': - if (item.val == '' or item.val == 'x' or item.val == 'None'): - item.val = 0 - if order == 'up': - if (item.val == '' or item.val == 'x' or item.val == 'None'): - item.val = 'zzzzz' - - if idx >= 0: - if order == 'down': - body.sort(lambda A, B: cmp(B[idx].val, A[idx].val), key=natsort_key) - elif order == 'up': - body.sort(lambda A, B: cmp(A[idx].val, B[idx].val), key=natsort_key) - else: - pass - - for i, row in enumerate(body): - hr = HT.TR(Id = row[0].text) - for j, item in enumerate(row): - if (j not in hiddenColumnIdx): - if j == 0: - if addIndex == "1": - item.html.contents = [i+1] + item.html.contents - hr.append(item.html) - tbl.append(hr) - - return tbl - -def natsort_key(string): - r = [] - for c in string: - try: - c = int(c) - try: r[-1] = r[-1] * 10 + c - except: r.append(c) - except: - r.append(c) - return r + return access_to_confidential_phenotype_trait
\ No newline at end of file diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index ffc698de..2bd4b721 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -40,13 +40,6 @@ import logging from utility.logger import getLogger logger = getLogger(__name__) -def get_collection(): - if g.user_session.logged_in: - return UserCollection() - else: - return AnonCollection() - #else: - # CauseError class AnonCollection(object): """User is not logged in""" @@ -130,10 +123,6 @@ class AnonCollection(object): collections_list.append(collection_dict) Redis.set(self.key, json.dumps(collections_list)) - #Redis.sadd(self.key, *list(traits)) - #Redis.expire(self.key, 60 * 60 * 24 * 5) - #len_now = len(Redis.smembers(self.key)) - #report_change(len_before, len_now) def remove_traits(self, params): traits_to_remove = [(":").join(trait.split(":")[:2]) for trait in params.getlist('traits[]')] @@ -282,7 +271,6 @@ def create_new(collection_name): db_session.commit() return redirect(url_for('view_collection', uc_id=uc.id)) else: - current_collections = user_manager.AnonUser().get_collections() ac = AnonCollection(collection_name) ac.changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') ac.add_traits(params) diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index a08cd759..94711c67 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -19,37 +19,31 @@ class CorrScatterPlot(object): width = int(params['width']) except: width = 800 - self.width = width try: height = int(params['height']) except: height = 600 - self.height = height try: circle_color = params['circle_color'] except: circle_color = '#3D85C6' - self.circle_color = circle_color try: circle_radius = int(params['circle_radius']) except: circle_radius = 5 - self.circle_radius = circle_radius try: line_color = params['line_color'] except: line_color = '#FF0000' - self.line_color = line_color try: line_width = int(params['line_width']) except: line_width = 1 - self.line_width = line_width samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) @@ -66,14 +60,14 @@ class CorrScatterPlot(object): x = np.array(vals_1) y = np.array(vals_2) - slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) + slope, intercept, r_value, p_value, _std_err = stats.linregress(x, y) rx = stats.rankdata(x) ry = stats.rankdata(y) self.rdata = [] self.rdata.append(rx.tolist()) self.rdata.append(ry.tolist()) - srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry) + srslope, srintercept, srr_value, srp_value, _srstd_err = stats.linregress(rx, ry) self.js_data = dict( data = self.data, @@ -86,17 +80,17 @@ class CorrScatterPlot(object): num_overlap = num_overlap, vals_1 = vals_1, vals_2 = vals_2, - + slope = slope, intercept = intercept, r_value = r_value, p_value = p_value, - + srslope = srslope, srintercept = srintercept, srr_value = srr_value, srp_value = srp_value, - + width = width, height = height, circle_color = circle_color, diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index 80a0818c..06dec795 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -28,524 +28,12 @@ from __future__ import absolute_import, print_function, division import math import rpy2.robjects -import pp import string -from utility import webqtlUtil from base.mrna_assay_tissue_data import MrnaAssayTissueData -from base.trait import GeneralTrait -from db import webqtlDatabaseFunction from flask import Flask, g -#XZ: The input 'controls' is String. It contains the full name of control traits. -#XZ: The input variable 'strainlst' is List. It contains the strain names of primary trait. -#XZ: The returned tcstrains is the list of list [[],[]...]. So are tcvals and tcvars. The last returned parameter is list of numbers. -#XZ, 03/29/2010: For each returned control trait, there is no None value in it. -def controlStrains(controls, strainlst): - - controls = controls.split(',') - - cvals = {} - for oneTraitName in controls: - oneTrait = webqtlTrait(fullname=oneTraitName, cursor=webqtlDatabaseFunction.getCursor() ) - oneTrait.retrieveData() - cvals[oneTraitName] = oneTrait.data - - tcstrains = [] - tcvals = [] - tcvars = [] - - for oneTraitName in controls: - strains = [] - vals = [] - vars = [] - - for _strain in strainlst: - if cvals[oneTraitName].has_key(_strain): - _val = cvals[oneTraitName][_strain].val - if _val != None: - strains.append(_strain) - vals.append(_val) - vars.append(None) - - tcstrains.append(strains) - tcvals.append(vals) - tcvars.append(vars) - - return tcstrains, tcvals, tcvars, [len(x) for x in tcstrains] - - - -#XZ, 03/29/2010: After execution of functon "controlStrains" and "fixStrains", primary trait and control traits have the same strains and in the same order. There is no 'None' value in them. -def fixStrains(_strains,_controlstrains,_vals,_controlvals,_vars,_controlvars): - """Corrects strains, vals, and vars so that all contrain only those strains common - to the reference trait and all control traits.""" - - def dictify(strains,vals,vars): - subdict = {} - for i in xrange(len(strains)): - subdict[strains[i]] = (vals[i],vars[i]) - return subdict - - #XZ: The 'dicts' is a list of dictionary. The first element is the dictionary of reference trait. The rest elements are for control traits. - dicts = [] - dicts.append(dictify(_strains,_vals,_vars)) - - nCstrains = len(_controlstrains) - for i in xrange(nCstrains): - dicts.append(dictify(_controlstrains[i],_controlvals[i],_controlvars[i])) - - _newstrains = [] - _vals = [] - _vars = [] - _controlvals = [[] for x in xrange(nCstrains)] - _controlvars = [[] for x in xrange(nCstrains)] - - for strain in _strains: - inall = True - for d in dicts: - if strain not in d: - inall = False - break - if inall: - _newstrains.append(strain) - _vals.append(dicts[0][strain][0]) - _vars.append(dicts[0][strain][1]) - for i in xrange(nCstrains): - _controlvals[i].append(dicts[i+1][strain][0]) - _controlvars[i].append(dicts[i+1][strain][1]) - - return _newstrains, _vals, _controlvals, _vars, _controlvars - - -#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty. -#else, the returned list has two elements of control trait name. -def findIdenticalControlTraits ( controlVals, controlNames ): - nameOfIdenticalTraits = [] - - controlTraitNumber = len(controlVals) - - if controlTraitNumber > 1: - - #XZ: reset the precision of values and convert to string type - for oneTraitVal in controlVals: - for oneStrainVal in oneTraitVal: - oneStrainVal = '%.3f' % oneStrainVal - - for i, oneTraitVal in enumerate( controlVals ): - for j in range(i+1, controlTraitNumber): - if oneTraitVal == controlVals[j]: - nameOfIdenticalTraits.append(controlNames[i]) - nameOfIdenticalTraits.append(controlNames[j]) - - return nameOfIdenticalTraits - -#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty. -#else, the returned list has two elements of control trait name. -#primaryVal is of list type. It contains value of primary trait. -#primaryName is of string type. -#controlVals is of list type. Each element is list too. Each element contain value of one control trait. -#controlNames is of list type. -def findIdenticalTraits (primaryVal, primaryName, controlVals, controlNames ): - nameOfIdenticalTraits = [] - - #XZ: reset the precision of values and convert to string type - for oneStrainVal in primaryVal: - oneStrainVal = '%.3f' % oneStrainVal - - for oneTraitVal in controlVals: - for oneStrainVal in oneTraitVal: - oneStrainVal = '%.3f' % oneStrainVal - - controlTraitNumber = len(controlVals) - - if controlTraitNumber > 1: - for i, oneTraitVal in enumerate( controlVals ): - for j in range(i+1, controlTraitNumber): - if oneTraitVal == controlVals[j]: - nameOfIdenticalTraits.append(controlNames[i]) - nameOfIdenticalTraits.append(controlNames[j]) - break - - if len(nameOfIdenticalTraits) == 0: - for i, oneTraitVal in enumerate( controlVals ): - if primaryVal == oneTraitVal: - nameOfIdenticalTraits.append(primaryName) - nameOfIdenticalTraits.append(controlNames[i]) - break - - return nameOfIdenticalTraits - - - -#XZ, 03/29/2010: The strains in primaryVal, controlVals, targetVals must be of the same number and in same order. -#XZ: No value in primaryVal and controlVals could be None. - -def determinePartialsByR (primaryVal, controlVals, targetVals, targetNames, method='p'): - - def compute_partial ( primaryVal, controlVals, targetVals, targetNames, method ): - - rpy2.robjects.r(""" -pcor.test <- function(x,y,z,use="mat",method="p",na.rm=T){ - # The partial correlation coefficient between x and y given z - # - # pcor.test is free and comes with ABSOLUTELY NO WARRANTY. - # - # x and y should be vectors - # - # z can be either a vector or a matrix - # - # use: There are two methods to calculate the partial correlation coefficient. - # One is by using variance-covariance matrix ("mat") and the other is by using recursive formula ("rec"). - # Default is "mat". - # - # method: There are three ways to calculate the correlation coefficient, - # which are Pearson's ("p"), Spearman's ("s"), and Kendall's ("k") methods. - # The last two methods which are Spearman's and Kendall's coefficient are based on the non-parametric analysis. - # Default is "p". - # - # na.rm: If na.rm is T, then all the missing samples are deleted from the whole dataset, which is (x,y,z). - # If not, the missing samples will be removed just when the correlation coefficient is calculated. - # However, the number of samples for the p-value is the number of samples after removing - # all the missing samples from the whole dataset. - # Default is "T". - - x <- c(x) - y <- c(y) - z <- as.data.frame(z) - - if(use == "mat"){ - p.use <- "Var-Cov matrix" - pcor = pcor.mat(x,y,z,method=method,na.rm=na.rm) - }else if(use == "rec"){ - p.use <- "Recursive formula" - pcor = pcor.rec(x,y,z,method=method,na.rm=na.rm) - }else{ - stop("use should be either rec or mat!\n") - } - - # print the method - if(gregexpr("p",method)[[1]][1] == 1){ - p.method <- "Pearson" - }else if(gregexpr("s",method)[[1]][1] == 1){ - p.method <- "Spearman" - }else if(gregexpr("k",method)[[1]][1] == 1){ - p.method <- "Kendall" - }else{ - stop("method should be pearson or spearman or kendall!\n") - } - - # sample number - n <- dim(na.omit(data.frame(x,y,z)))[1] - - # given variables' number - gn <- dim(z)[2] - - # p-value - if(p.method == "Kendall"){ - statistic <- pcor/sqrt(2*(2*(n-gn)+5)/(9*(n-gn)*(n-1-gn))) - p.value <- 2*pnorm(-abs(statistic)) - - }else{ - statistic <- pcor*sqrt((n-2-gn)/(1-pcor^2)) - p.value <- 2*pnorm(-abs(statistic)) - } - - data.frame(estimate=pcor,p.value=p.value,statistic=statistic,n=n,gn=gn,Method=p.method,Use=p.use) -} - -# By using var-cov matrix -pcor.mat <- function(x,y,z,method="p",na.rm=T){ - - x <- c(x) - y <- c(y) - z <- as.data.frame(z) - - if(dim(z)[2] == 0){ - stop("There should be given data\n") - } - - data <- data.frame(x,y,z) - - if(na.rm == T){ - data = na.omit(data) - } - - xdata <- na.omit(data.frame(data[,c(1,2)])) - Sxx <- cov(xdata,xdata,m=method) - - xzdata <- na.omit(data) - xdata <- data.frame(xzdata[,c(1,2)]) - zdata <- data.frame(xzdata[,-c(1,2)]) - Sxz <- cov(xdata,zdata,m=method) - - zdata <- na.omit(data.frame(data[,-c(1,2)])) - Szz <- cov(zdata,zdata,m=method) - - # is Szz positive definite? - zz.ev <- eigen(Szz)$values - if(min(zz.ev)[1]<0){ - stop("\'Szz\' is not positive definite!\n") - } - - # partial correlation - Sxx.z <- Sxx - Sxz %*% solve(Szz) %*% t(Sxz) - - rxx.z <- cov2cor(Sxx.z)[1,2] - - rxx.z -} - -# By using recursive formula -pcor.rec <- function(x,y,z,method="p",na.rm=T){ - # - - x <- c(x) - y <- c(y) - z <- as.data.frame(z) - - if(dim(z)[2] == 0){ - stop("There should be given data\n") - } - - data <- data.frame(x,y,z) - - if(na.rm == T){ - data = na.omit(data) - } - - # recursive formula - if(dim(z)[2] == 1){ - tdata <- na.omit(data.frame(data[,1],data[,2])) - rxy <- cor(tdata[,1],tdata[,2],m=method) - - tdata <- na.omit(data.frame(data[,1],data[,-c(1,2)])) - rxz <- cor(tdata[,1],tdata[,2],m=method) - - tdata <- na.omit(data.frame(data[,2],data[,-c(1,2)])) - ryz <- cor(tdata[,1],tdata[,2],m=method) - - rxy.z <- (rxy - rxz*ryz)/( sqrt(1-rxz^2)*sqrt(1-ryz^2) ) - - return(rxy.z) - }else{ - x <- c(data[,1]) - y <- c(data[,2]) - z0 <- c(data[,3]) - zc <- as.data.frame(data[,-c(1,2,3)]) - - rxy.zc <- pcor.rec(x,y,zc,method=method,na.rm=na.rm) - rxz0.zc <- pcor.rec(x,z0,zc,method=method,na.rm=na.rm) - ryz0.zc <- pcor.rec(y,z0,zc,method=method,na.rm=na.rm) - - rxy.z <- (rxy.zc - rxz0.zc*ryz0.zc)/( sqrt(1-rxz0.zc^2)*sqrt(1-ryz0.zc^2) ) - return(rxy.z) - } -} -""") - - R_pcorr_function = rpy2.robjects.r['pcor.test'] - R_corr_test = rpy2.robjects.r['cor.test'] - - primary = rpy2.robjects.FloatVector(range(len(primaryVal))) - for i in range(len(primaryVal)): - primary[i] = primaryVal[i] - - control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(controlVals[0])) ), ncol=len(controlVals)) - for i in range(len(controlVals)): - for j in range(len(controlVals[0])): - control[i*len(controlVals[0]) + j] = controlVals[i][j] - - allcorrelations = [] - - for targetIndex, oneTargetVals in enumerate(targetVals): - - this_primary = None - this_control = None - this_target = None - - if None in oneTargetVals: - - goodIndex = [] - for i in range(len(oneTargetVals)): - if oneTargetVals[i] != None: - goodIndex.append(i) - - this_primary = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - this_primary[i] = primaryVal[goodIndex[i]] - - this_control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(goodIndex)) ), ncol=len(controlVals)) - for i in range(len(controlVals)): - for j in range(len(goodIndex)): - this_control[i*len(goodIndex) + j] = controlVals[i][goodIndex[j]] - - this_target = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - this_target[i] = oneTargetVals[goodIndex[i]] - - else: - this_primary = primary - this_control = control - this_target = rpy2.robjects.FloatVector(range(len(oneTargetVals))) - for i in range(len(oneTargetVals)): - this_target[i] = oneTargetVals[i] - - one_name = targetNames[targetIndex] - one_N = len(this_primary) - - #calculate partial correlation - one_pc_coefficient = 'NA' - one_pc_p = 1 - - try: - if method == 's': - result = R_pcorr_function(this_primary, this_target, this_control, method='s') - else: - result = R_pcorr_function(this_primary, this_target, this_control) - - #XZ: In very few cases, the returned coefficient is nan. - #XZ: One way to detect nan is to compare the number to itself. NaN is always != NaN - if result[0][0] == result[0][0]: - one_pc_coefficient = result[0][0] - #XZ: when the coefficient value is 1 (primary trait and target trait are the same), - #XZ: occationally, the returned p value is nan instead of 0. - if result[1][0] == result[1][0]: - one_pc_p = result[1][0] - elif abs(one_pc_coefficient - 1) < 0.0000001: - one_pc_p = 0 - except: - pass - - #calculate zero order correlation - one_corr_coefficient = 0 - one_corr_p = 1 - - try: - if method == 's': - R_result = R_corr_test(this_primary, this_target, method='spearman') - else: - R_result = R_corr_test(this_primary, this_target) - - one_corr_coefficient = R_result[3][0] - one_corr_p = R_result[2][0] - except: - pass - - traitinfo = [ one_name, one_N, one_pc_coefficient, one_pc_p, one_corr_coefficient, one_corr_p ] - - allcorrelations.append(traitinfo) - - return allcorrelations - #End of function compute_partial - - - allcorrelations = [] - - target_trait_number = len(targetVals) - - if target_trait_number < 1000: - allcorrelations = compute_partial ( primaryVal, controlVals, targetVals, targetNames, method ) - else: - step = 1000 - job_number = math.ceil( float(target_trait_number)/step ) - - job_targetVals_lists = [] - job_targetNames_lists = [] - - for job_index in range( int(job_number) ): - starti = job_index*step - endi = min((job_index+1)*step, target_trait_number) - - one_job_targetVals_list = [] - one_job_targetNames_list = [] - - for i in range( starti, endi ): - one_job_targetVals_list.append( targetVals[i] ) - one_job_targetNames_list.append( targetNames[i] ) - - job_targetVals_lists.append( one_job_targetVals_list ) - job_targetNames_lists.append( one_job_targetNames_list ) - - ppservers = () - # Creates jobserver with automatically detected number of workers - job_server = pp.Server(ppservers=ppservers) - - jobs = [] - results = [] - - for i, one_job_targetVals_list in enumerate( job_targetVals_lists ): - one_job_targetNames_list = job_targetNames_lists[i] - #pay attention to modules from outside - jobs.append( job_server.submit(func=compute_partial, args=( primaryVal, controlVals, one_job_targetVals_list, one_job_targetNames_list, method), depfuncs=(), modules=("rpy2.robjects",)) ) - - for one_job in jobs: - one_result = one_job() - results.append( one_result ) - - for one_result in results: - for one_traitinfo in one_result: - allcorrelations.append( one_traitinfo ) - - return allcorrelations - - - -#XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait -#XZ: The primaryTrait and targetTrait should have executed retrieveData function -def calZeroOrderCorr(primaryTrait, targetTrait, method='pearson'): - - #primaryTrait.retrieveData() - - #there is no None value in primary_val - primary_strain, primary_val, primary_var = primaryTrait.exportInformative() - - #targetTrait.retrieveData() - - #there might be None value in target_val - target_val = targetTrait.exportData(primary_strain, type="val") - - R_primary = rpy2.robjects.FloatVector(range(len(primary_val))) - for i in range(len(primary_val)): - R_primary[i] = primary_val[i] - - N = len(target_val) - - if None in target_val: - goodIndex = [] - for i in range(len(target_val)): - if target_val[i] != None: - goodIndex.append(i) - - N = len(goodIndex) - - R_primary = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - R_primary[i] = primary_val[goodIndex[i]] - - R_target = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - R_target[i] = target_val[goodIndex[i]] - - else: - R_target = rpy2.robjects.FloatVector(range(len(target_val))) - for i in range(len(target_val)): - R_target[i] = target_val[i] - - R_corr_test = rpy2.robjects.r['cor.test'] - - if method == 'spearman': - R_result = R_corr_test(R_primary, R_target, method='spearman') - else: - R_result = R_corr_test(R_primary, R_target) - - corr_result = [] - corr_result.append( R_result[3][0] ) - corr_result.append( N ) - corr_result.append( R_result[2][0] ) - - return corr_result ##################################################################################### #Input: primaryValue(list): one list of expression values of one probeSet, @@ -585,170 +73,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears return corr_result - - -def batchCalTissueCorr(primaryTraitValue=[], SymbolValueDict={}, method='pearson'): - - def cal_tissue_corr(primaryTraitValue, oneSymbolValueDict, method ): - - oneSymbolCorrDict = {} - oneSymbolPvalueDict = {} - - R_corr_test = rpy2.robjects.r['cor.test'] - - R_primary = rpy2.robjects.FloatVector(range(len(primaryTraitValue))) - - for i in range(len(primaryTraitValue)): - R_primary[i] = primaryTraitValue[i] - - for (oneTraitSymbol, oneTraitValue) in oneSymbolValueDict.iteritems(): - R_target = rpy2.robjects.FloatVector(range(len(oneTraitValue))) - for i in range(len(oneTraitValue)): - R_target[i] = oneTraitValue[i] - - if method =='spearman': - R_result = R_corr_test(R_primary, R_target, method='spearman') - else: - R_result = R_corr_test(R_primary, R_target) - - oneSymbolCorrDict[oneTraitSymbol] = R_result[3][0] - oneSymbolPvalueDict[oneTraitSymbol] = R_result[2][0] - - return(oneSymbolCorrDict, oneSymbolPvalueDict) - - - - symbolCorrDict = {} - symbolPvalueDict = {} - - items_number = len(SymbolValueDict) - - if items_number <= 1000: - symbolCorrDict, symbolPvalueDict = cal_tissue_corr(primaryTraitValue, SymbolValueDict, method) - else: - items_list = SymbolValueDict.items() - - step = 1000 - job_number = math.ceil( float(items_number)/step ) - - job_oneSymbolValueDict_list = [] - - for job_index in range( int(job_number) ): - starti = job_index*step - endi = min((job_index+1)*step, items_number) - - oneSymbolValueDict = {} - - for i in range( starti, endi ): - one_item = items_list[i] - one_symbol = one_item[0] - one_value = one_item[1] - oneSymbolValueDict[one_symbol] = one_value - - job_oneSymbolValueDict_list.append( oneSymbolValueDict ) - - - ppservers = () - # Creates jobserver with automatically detected number of workers - job_server = pp.Server(ppservers=ppservers) - - jobs = [] - results = [] - - for i, oneSymbolValueDict in enumerate( job_oneSymbolValueDict_list ): - - #pay attention to modules from outside - jobs.append( job_server.submit(func=cal_tissue_corr, args=(primaryTraitValue, oneSymbolValueDict, method), depfuncs=(), modules=("rpy2.robjects",)) ) - - for one_job in jobs: - one_result = one_job() - results.append( one_result ) - - for one_result in results: - oneSymbolCorrDict, oneSymbolPvalueDict = one_result - symbolCorrDict.update( oneSymbolCorrDict ) - symbolPvalueDict.update( oneSymbolPvalueDict ) - - return (symbolCorrDict, symbolPvalueDict) - -########################################################################### -#Input: cursor, GeneNameLst (list), TissueProbeSetFreezeId -#output: geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict (Dict) -#function: get multi dicts for short and long label functions, and for getSymbolValuePairDict and -# getGeneSymbolTissueValueDict to build dict to get CorrPvArray -#Note: If there are multiple probesets for one gene, select the one with highest mean. -########################################################################### -def getTissueProbeSetXRefInfo(GeneNameLst=[],TissueProbeSetFreezeId=0): - Symbols ="" - symbolList =[] - geneIdDict ={} - dataIdDict = {} - ChrDict = {} - MbDict = {} - descDict = {} - pTargetDescDict = {} - - count = len(GeneNameLst) - - # Added by NL 01/06/2011 - # Note that:inner join is necessary in this query to get distinct record in one symbol group with highest mean value - # Duo to the limit size of TissueProbeSetFreezeId table in DB, performance of inner join is acceptable. - if count==0: - query=''' - select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description - from ( - select Symbol, max(Mean) as maxmean - from TissueProbeSetXRef - where TissueProbeSetFreezeId=%s and Symbol!='' and Symbol Is Not Null group by Symbol) - as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; - '''%TissueProbeSetFreezeId - - else: - for i, item in enumerate(GeneNameLst): - - if i == count-1: - Symbols += "'%s'" %item - else: - Symbols += "'%s'," %item - - Symbols = "("+ Symbols+")" - query=''' - select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description - from ( - select Symbol, max(Mean) as maxmean - from TissueProbeSetXRef - where TissueProbeSetFreezeId=%s and Symbol in %s group by Symbol) - as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; - '''% (TissueProbeSetFreezeId,Symbols) - - try: - cursor.execute(query) - results =cursor.fetchall() - resultCount = len(results) - # Key in all dicts is the lower-cased symbol - for i, item in enumerate(results): - symbol = item[0] - symbolList.append(symbol) - - key =symbol.lower() - geneIdDict[key]=item[1] - dataIdDict[key]=item[2] - ChrDict[key]=item[3] - MbDict[key]=item[4] - descDict[key]=item[5] - pTargetDescDict[key]=item[6] - - except: - symbolList = None - geneIdDict=None - dataIdDict=None - ChrDict=None - MbDict=None - descDict=None - pTargetDescDict=None - - return symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict - ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, @@ -757,7 +81,6 @@ def getTissueProbeSetXRefInfo(GeneNameLst=[],TissueProbeSetFreezeId=0): #Attention! All keys are lower case! ########################################################################### def get_symbol_value_pairs(tissue_data): - id_list = [tissue_data[symbol.lower()].data_id for item in tissue_data] symbol_value_pairs = {} @@ -775,23 +98,6 @@ def get_symbol_value_pairs(tissue_data): except: symbol_value_pairs[symbol] = None - #for symbol in symbol_list: - # if tissue_data.has_key(symbol): - # data_id = tissue_data[symbol].data_id - # - # query = """select value, id - # from TissueProbeSetData - # where Id={}""".format(escape(data_id)) - # try : - # results = g.db.execute(query).fetchall() - # for item in results: - # item = item[0] - # value_list.append(item) - # symbol_value_pairs[symbol] = value_list - # value_list=[] - # except: - # symbol_value_pairs[symbol] = None - return symbol_value_pairs @@ -808,150 +114,4 @@ def get_trait_symbol_and_tissue_values(symbol_list=None): tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) if len(tissue_data.gene_symbols): - return tissue_data.get_symbol_values_pairs() - - #symbolList, - #geneIdDict, - #dataIdDict, - #ChrDict, - #MbDict, - #descDict, - #pTargetDescDict = getTissueProbeSetXRefInfo( - # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - #limit_num=1000 - #count = len(symbol_list) - # - #symbol_value_pairs = {} - # - #if count !=0 and count <= limit_num: - # symbol_value_pairs = getSymbolValuePairDict(cursor=cursor,symbolList=symbol_list,dataIdDict=dataIdDict) - # - #elif count > limit_num: - # n = count/limit_num - # start = 0 - # stop = 0 - # - # for i in range(n): - # stop =limit_num*(i+1) - # gList1 = symbolList[start:stop] - # PairDict1 = getSymbolValuePairDict(cursor=cursor,symbolList=gList1,dataIdDict=dataIdDict) - # start =limit_num*(i+1) - # - # SymbolValuePairDict.update(PairDict1) - # - # if stop < count: - # stop = count - # gList2 = symbolList[start:stop] - # PairDict2 = getSymbolValuePairDict(cursor=cursor,symbolList=gList2,dataIdDict=dataIdDict) - # SymbolValuePairDict.update(PairDict2) - # - #return SymbolValuePairDict - -######################################################################################################## -#input: cursor, GeneNameLst (list), TissueProbeSetFreezeId(int) -#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. -# key is symbol, value is one list of expression values of one probeSet. -#function: wrapper function of getGeneSymbolTissueValueDict function -# for CorrelationPage.py -######################################################################################################## - -#def get_trait_symbol_and_tissue_values(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0): -# SymbolValuePairDict={} -# -# symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = getTissueProbeSetXRefInfo( -# cursor=cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) -# -# if symbolList: -# SymbolValuePairDict = get_gene_symbol_and_tissue_values(symbolList=symbolList, -# dataIdDict=dataIdDict) -# -# return SymbolValuePairDict - -######################################################################################################## -#Input: cursor(cursor): MySQL connnection cursor; -# priGeneSymbolList(list): one list of gene symbol; -# symbolValuepairDict(dictionary): one dictionary of Symbol and Value Pair, -# key is symbol, value is one list of expression values of one probeSet; -#Output: corrArray(array): array of Correlation Value, -# pvArray(array): array of PValue; -#Function: build corrArray, pvArray for display by calling calculation function:calZeroOrderCorrForTiss -######################################################################################################## - -def getCorrPvArray(cursor=None,priGeneSymbolList=[],symbolValuepairDict={}): - # setting initial value for corrArray, pvArray equal to 0 - Num = len(priGeneSymbolList) - - corrArray = [([0] * (Num))[:] for i in range(Num)] - pvArray = [([0] * (Num))[:] for i in range(Num)] - i = 0 - for pkey in priGeneSymbolList: - j = 0 - pkey = pkey.strip().lower()# key in symbolValuepairDict is low case - if symbolValuepairDict.has_key(pkey): - priValue = symbolValuepairDict[pkey] - for tkey in priGeneSymbolList: - tkey = tkey.strip().lower()# key in symbolValuepairDict is low case - if priValue and symbolValuepairDict.has_key(tkey): - tarValue = symbolValuepairDict[tkey] - - if tarValue: - if i>j: - # corrArray stores Pearson Correlation values - # pvArray stores Pearson P-Values - pcorr_result =calZeroOrderCorrForTiss(primaryValue=priValue,targetValue=tarValue) - corrArray[i][j] =pcorr_result[0] - pvArray[i][j] =pcorr_result[2] - elif i<j: - # corrArray stores Spearman Correlation values - # pvArray stores Spearman P-Values - scorr_result =calZeroOrderCorrForTiss(primaryValue=priValue,targetValue=tarValue,method='spearman') - corrArray[i][j] =scorr_result[0] - pvArray[i][j] =scorr_result[2] - else: - # on the diagonal line, correlation value is 1, P-Values is 0 - corrArray[i][j] =1 - pvArray[i][j] =0 - j+=1 - else: - corrArray[i][j] = None - pvArray[i][j] = None - j+=1 - else: - corrArray[i][j] = None - pvArray[i][j] = None - j+=1 - else: - corrArray[i][j] = None - pvArray[i][j] = None - - i+=1 - - return corrArray, pvArray - -######################################################################################################## -#Input: cursor(cursor): MySQL connnection cursor; -# primaryTraitSymbol(string): one gene symbol; -# TissueProbeSetFreezeId (int): Id of related TissueProbeSetFreeze -# method: '0' default value, Pearson Correlation; '1', Spearman Correlation -#Output: symbolCorrDict(Dict): Dict of Correlation Value, key is symbol -# symbolPvalueDict(Dict): Dict of PValue,key is symbol ; -#Function: build symbolCorrDict, symbolPvalueDict for display by calling calculation function:calZeroOrderCorrForTiss -######################################################################################################## -def calculateCorrOfAllTissueTrait(cursor=None, primaryTraitSymbol=None, TissueProbeSetFreezeId=None,method='0'): - - symbolCorrDict = {} - symbolPvalueDict = {} - - primaryTraitSymbolValueDict = getGeneSymbolTissueValueDictForTrait(cursor=cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TissueProbeSetFreezeId) - primaryTraitValue = primaryTraitSymbolValueDict.values()[0] - - SymbolValueDict = getGeneSymbolTissueValueDictForTrait(cursor=cursor, GeneNameLst=[], TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - if method =='1': - symbolCorrDict, symbolPvalueDict = batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman') - else: - symbolCorrDict, symbolPvalueDict = batchCalTissueCorr(primaryTraitValue,SymbolValueDict) - - - return (symbolCorrDict, symbolPvalueDict) + return tissue_data.get_symbol_values_pairs()
\ No newline at end of file diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 73072423..9f3f7982 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -21,9 +21,7 @@ from __future__ import absolute_import, print_function, division import sys -# sys.path.append(".") Never in a running webserver -import gc import string import cPickle import os @@ -64,8 +62,6 @@ from flask import Flask, g import utility.logger logger = utility.logger.getLogger(__name__ ) -METHOD_SAMPLE_PEARSON = "1" -METHOD_SAMPLE_RANK = "2" METHOD_LIT = "3" METHOD_TISSUE_PEARSON = "4" METHOD_TISSUE_RANK = "5" @@ -74,26 +70,7 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 -def print_mem(stage=""): - mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - #print("{}: {}".format(stage, mem/1024)) - -class AuthException(Exception): - pass - class CorrelationResults(object): - - corr_min_informative = 4 - - #PAGE_HEADING = "Correlation Table" - #CORRELATION_METHODS = {"1" : "Genetic Correlation (Pearson's r)", - # "2" : "Genetic Correlation (Spearman's rho)", - # "3" : "SGO Literature Correlation", - # "4" : "Tissue Correlation (Pearson's r)", - # "5" : "Tissue Correlation (Spearman's rho)"} - # - #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1} - def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets @@ -167,16 +144,12 @@ class CorrelationResults(object): self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) - # print("self.sample_data.keys: %s" % self.sample_data.keys) self.target_dataset.get_trait_data(self.sample_data.keys()) self.correlation_results = [] self.correlation_data = {} - db_filename = self.getFileName(target_db_name = self.target_dataset.name) - cache_available = db_filename in os.listdir(webqtlConfig.GENERATED_TEXT_DIR) - if self.corr_type == "tissue": self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol") @@ -196,24 +169,6 @@ class CorrelationResults(object): self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": - #ZS: Commented out since parallel correlation has issues with gunicorn - # if self.dataset.type == "ProbeSet" and cache_available: - # dataset_file = open(webqtlConfig.GENERATED_TEXT_DIR+db_filename,'r') - - ##XZ, 01/08/2009: read the first line - # line = dataset_file.readline() - # dataset_strains = webqtlUtil.readLineCSV(line)[1:] - - # self.this_trait_vals = [] - # for item in dataset_strains: - # if item in self.sample_data: - # self.this_trait_vals.append(self.sample_data[item]) - # else: - # self.this_trait_vals.append("None") - # num_overlap = len(self.this_trait_vals) - # logger.debug("DOING PARALLEL") - # self.do_parallel_correlation(db_filename, num_overlap) - # else: for trait, values in self.target_dataset.trait_data.iteritems(): self.get_sample_r_and_p_values(trait, values) @@ -256,9 +211,6 @@ class CorrelationResults(object): trait_object.sample_p, trait_object.num_overlap) = self.correlation_data[trait] - #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, - #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) - # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 @@ -274,9 +226,6 @@ class CorrelationResults(object): trait_object.sample_p, trait_object.num_overlap) = self.correlation_data[trait] - #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, - #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) - # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 @@ -296,60 +245,8 @@ class CorrelationResults(object): if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() - #print("self.correlation_results: ", pf(self.correlation_results)) - self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset) - #XZ, 09/18/2008: get all information about the user selected database. - #target_db_name = fd.corr_dataset - #self.target_db_name = start_vars['corr_dataset'] - - # Zach said this is ok - # Auth if needed - #try: - # auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName) - #except AuthException as e: - # detail = [e.message] - # return self.error(detail) - - #XZ, 09/18/2008: filter out the strains that have no value. - #self.sample_names, vals, vars, N = fd.informativeStrains(sample_list) - - #print("samplenames is:", pf(self.sample_names)) - #CF - If less than a minimum number of strains/cases in common, don't calculate anything - #if len(self.sample_names) < self.corr_min_informative: - # detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)] - # self.error(heading=None, detail=detail) - - #correlation_method = self.CORRELATION_METHODS[self.method] - #rankOrder = self.RANK_ORDERS[self.method] - - # CF - Number of results returned - # Todo: Get rid of self.returnNumber - - #self.record_count = 0 - - #myTrait = get_custom_trait(fd, self.cursor) - - - # We will not get Literature Correlations if there is no GeneId because there is nothing - # to look against - #self.geneid = self.this_trait.geneid - - # We will not get Tissue Correlations if there is no gene symbol because there is nothing to look against - #self.trait_symbol = myTrait.symbol - - - #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid - #self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.dataset.group.species, self.geneid) - - #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' - #self.tissue_probeset_freeze_id = 1 - - #traitList = self.correlate() - - #print("Done doing correlation calculation") - ############################################################################################################################################ def get_formatted_corr_type(self): @@ -375,13 +272,6 @@ class CorrelationResults(object): if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - - #gene_symbol_list = [] - # - #for trait in self.correlation_results: - # if hasattr(trait, 'symbol'): - # gene_symbol_list.append(trait.symbol) - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( @@ -398,17 +288,6 @@ class CorrelationResults(object): trait.tissue_corr = result[0] trait.tissue_pvalue = result[2] - # else: - # trait.tissue_corr = None - # trait.tissue_pvalue = None - #else: - # for trait in self.correlation_results: - # trait.tissue_corr = None - # trait.tissue_pvalue = None - - #return self.correlation_results - - def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1): #Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( @@ -429,7 +308,6 @@ class CorrelationResults(object): for trait, symbol in self.trait_symbol_dict.iteritems(): if symbol and symbol.lower() in corr_result_tissue_vals_dict: this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] - #print("this_trait_tissue_values: ", pf(this_trait_tissue_values)) result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, @@ -442,7 +320,6 @@ class CorrelationResults(object): return tissue_corr_data - def do_lit_correlation_for_trait_list(self): input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) @@ -581,234 +458,6 @@ class CorrelationResults(object): if num_overlap > 5: self.correlation_data[trait] = [sample_r, sample_p, num_overlap] - - """ - correlations = [] - - #XZ: Use the fast method only for probeset dataset, and this dataset must have been created. - #XZ: Otherwise, use original method - #print("Entering correlation") - - #db_filename = self.getFileName(target_db_name=self.target_db_name) - # - #cache_available = db_filename in os.listdir(webqtlConfig.GENERATED_TEXT_DIR) - - # If the cache file exists, do a cached correlation for probeset data - if self.dataset.type == "ProbeSet": -# if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available: -# traits = do_parallel_correlation() -# -# else: - - traits = self.get_traits(self.vals) - - for trait in traits: - trait.calculate_correlation(vals, self.method) - - self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later - - #XZ, 3/31/2010: Theoretically, we should create one function 'comTissueCorr' - #to compare each trait by their tissue corr p values. - #But because the tissue corr p values are generated by permutation test, - #the top ones always have p value 0. So comparing p values actually does nothing. - #In addition, for the tissue data in our database, the N is always the same. - #So it's safe to compare with tissue corr statistic value. - #That's the same as literature corr. - #if self.method in [METHOD_LIT, METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] and self.gene_id: - # traits.sort(webqtlUtil.cmpLitCorr) - #else: - #if self.method in TISSUE_METHODS: - # sort(traits, key=lambda A: math.fabs(A.tissue_corr)) - #elif self.method == METHOD_LIT: - # traits.sort(traits, key=lambda A: math.fabs(A.lit_corr)) - #else: - traits = sortTraitCorrelations(traits, self.method) - - # Strip to the top N correlations - traits = traits[:min(self.returnNumber, len(traits))] - - addLiteratureCorr = False - addTissueCorr = False - - trait_list = [] - for trait in traits: - db_trait = webqtlTrait(db=self.db, name=trait.name, cursor=self.cursor) - db_trait.retrieveInfo( QTL='Yes' ) - - db_trait.Name = trait.name - db_trait.corr = trait.correlation - db_trait.nOverlap = trait.overlap - db_trait.corrPValue = trait.p_value - - # NL, 07/19/2010 - # js function changed, add a new parameter rankOrder for js function 'showTissueCorrPlot' - db_trait.RANK_ORDER = self.RANK_ORDERS[self.method] - - #XZ, 26/09/2008: Method is 4 or 5. Have fetched tissue corr, but no literature correlation yet. - if self.method in TISSUE_METHODS: - db_trait.tissueCorr = trait.tissue_corr - db_trait.tissuePValue = trait.p_tissue - addTissueCorr = True - - - #XZ, 26/09/2008: Method is 3, Have fetched literature corr, but no tissue corr yet. - elif self.method == METHOD_LIT: - db_trait.LCorr = trait.lit_corr - db_trait.mouse_geneid = self.translateToMouseGeneID(self.species, db_trait.geneid) - addLiteratureCorr = True - - #XZ, 26/09/2008: Method is 1 or 2. Have NOT fetched literature corr and tissue corr yet. - # Phenotype data will not have geneid, and neither will some probes - # we need to handle this because we will get an attribute error - else: - if self.input_trait_mouse_gene_id and self.db.type=="ProbeSet": - addLiteratureCorr = True - if self.trait_symbol and self.db.type=="ProbeSet": - addTissueCorr = True - - trait_list.append(db_trait) - - if addLiteratureCorr: - trait_list = self.getLiteratureCorrelationByList(self.input_trait_mouse_gene_id, - self.species, trait_list) - if addTissueCorr: - trait_list = self.getTissueCorrelationByList( - primaryTraitSymbol = self.trait_symbol, - traitList = trait_list, - TissueProbeSetFreezeId = TISSUE_MOUSE_DB, - method=self.method) - - return trait_list - """ - - - def do_tissue_corr_for_all_traits_2(self): - """Comments Possibly Out of Date!!!!! - - Uses get_temp_tissue_corr_table to generate table of tissue correlations - - This function then gathers that data and pairs it with the TraitID string. - Takes as its arguments a formdata instance, and a dataset instance. - Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) - for the requested correlation - - Used when the user selects the tissue correlation method; i.e. not for the - column that is appended to all probeset trait correlation tables - - """ - - # table name string - temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB, - method=method) - - query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue - FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol - WHERE ProbeSetFreeze.Name = '{}' - and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSet.Symbol IS NOT NULL - and {}.Correlation IS NOT NULL""".format(dataset.mescape( - temp_table, temp_table, temp_table, temp_table, - self.dataset.name, temp_table)) - - results = g.db.execute(query).fetchall() - - tissue_corr_dict = {} - - for entry in results: - trait_name, tissue_corr, tissue_pvalue = entry - tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue) - #symbolList, - #geneIdDict, - #dataIdDict, - #ChrDict, - #MbDict, - #descDict, - #pTargetDescDict = getTissueProbeSetXRefInfo( - # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))) - - return tissue_corr_dict - - - #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2 - #XZ, 09/24/2008: Note that the correlation value can be negative. - def get_temp_tissue_corr_table(self, - tissue_probesetfreeze_id=0, - method="", - return_number=0): - - - def cmp_tisscorr_absolute_value(A, B): - try: - if abs(A[1]) < abs(B[1]): return 1 - elif abs(A[1]) == abs(B[1]): - return 0 - else: return -1 - except: - return 0 - - symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues( - tissue_dataset_id=TISSUE_MOUSE_DB) - - symbol_corr_list = symbol_corr_dict.items() - - symbol_corr_list.sort(cmp_tisscorr_absolute_value) - symbol_corr_list = symbol_corr_list[0 : 2*return_number] - - tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE") - - q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name - self.cursor.execute(q1) - - for one_pair in symbol_corr_list: - one_symbol = one_pair[0] - one_corr = one_pair[1] - one_p_value = symbol_pvalue_dict[one_symbol] - - self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) ) - - return tmp_table_name - - - def calculate_corr_for_all_tissues(self, tissue_dataset_id=None): - - symbol_corr_dict = {} - symbol_pvalue_dict = {} - - primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( - GeneNameLst=[self.this_trait.symbol], - TissueProbeSetFreezeId=tissue_dataset_id) - primary_trait_value = primary_trait_symbol_value_dict.values()[0] - - symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( - gene_name_list=[], - tissue_dataset_id=tissue_dataset_id) - - symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( - primaryTraitValue, - SymbolValueDict, - method=self.corr_method) - #else: - # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( - # primaryTraitValue, - # SymbolValueDict) - - return (symbolCorrDict, symbolPvalueDict) - - ##XZ, 12/16/2008: the input geneid is of mouse type - #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): - # q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) - # self.cursor.execute(q) - # try: - # x = self.cursor.fetchone() - # if x: return True - # else: raise - # except: return False - - def process_samples(self, start_vars, sample_names, excluded_samples=None): if not excluded_samples: excluded_samples = () @@ -821,581 +470,6 @@ class CorrelationResults(object): if not value.strip().lower() == 'x': self.sample_data[str(sample)] = float(value) - ##XZ, 12/16/2008: the input geneid is of mouse type - #def checkForLitInfo(self,geneId): - # q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId - # self.cursor.execute(q) - # try: - # x = self.cursor.fetchone() - # if x: return True - # else: raise - # except: return False - - - - def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId): - - StrainIds = [] - for item in strains: - self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species)) - Id = self.cursor.fetchone()[0] - StrainIds.append('%d' % Id) - - # break it into smaller chunks so we don't overload the MySql server - nnn = len(StrainIds) / 25 - if len(StrainIds) % 25: - nnn += 1 - oridata = [] - - #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId - tempTable = None - if GeneId and db.type == "ProbeSet": - if method == "3": - tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber) - - if method == "4" or method == "5": - tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber) - - for step in range(nnn): - temp = [] - StrainIdstep = StrainIds[step*25:min(len(StrainIds), (step+1)*25)] - for item in StrainIdstep: temp.append('T%s.value' % item) - - if db.type == "Publish": - query = "SELECT PublishXRef.Id, " - dataStartPos = 1 - query += string.join(temp,', ') - query += ' FROM (PublishXRef, PublishFreeze)' - #XZ, 03/04/2009: Xiaodong changed Data to PublishData - for item in StrainIdstep: - query += 'left join PublishData as T%s on T%s.Id = PublishXRef.DataId and T%s.StrainId=%s\n' %(item,item,item,item) - query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, ) - #XZ, 09/20/2008: extract literature correlation value together with gene expression values. - #XZ, 09/20/2008: notice the difference between the code in next block. - #elif tempTable: - # # we can get a little performance out of selecting our LitCorr here - # # but also we need to do this because we are unconcerned with probes that have no geneId associated with them - # # as we would not have litCorr data. - # - # if method == "3": - # query = "SELECT %s.Name, %s.value," % (db.type,tempTable) - # dataStartPos = 2 - # if method == "4" or method == "5": - # query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) - # dataStartPos = 3 - # - # query += string.join(temp,', ') - # query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - # if method == "3": - # query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) - # if method == "4" or method == "5": - # query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) - # #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) - # for item in StrainIdstep: - # query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) - # - # if method == "3": - # query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - # if method == "4" or method == "5": - # query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - else: - query = "SELECT %s.Name," % db.type - dataStartPos = 1 - query += string.join(temp,', ') - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) - for item in StrainIdstep: - query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) - query += "WHERE %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - - self.cursor.execute(query) - results = self.cursor.fetchall() - oridata.append(results) - - datasize = len(oridata[0]) - traits = [] - # put all of the separate data together into a huge list of lists - for j in range(datasize): - traitdata = list(oridata[0][j]) - for i in range(1,nnn): - traitdata += list(oridata[i][j][dataStartPos:]) - - trait = Trait(traitdata[0], traitdata[dataStartPos:]) - - if method == METHOD_LIT: - trait.lit_corr = traitdata[1] - - if method in TISSUE_METHODS: - trait.tissue_corr = traitdata[1] - trait.p_tissue = traitdata[2] - - traits.append(trait) - - if tempTable: - self.cursor.execute( 'DROP TEMPORARY TABLE %s' % tempTable ) - - return traits - - - - - # XZ, 09/20/2008: This function creates TEMPORARY TABLE tmpTableName_2 and return its name. - # XZ, 09/20/2008: It stores top literature correlation values associated with the input geneId. - # XZ, 09/20/2008: Attention: In each row, the input geneId is always in column GeneId1. - #XZ, 12/16/2008: the input geneid can be of mouse, rat or human type - def getTempLiteratureTable(self, species, input_species_geneid, returnNumber): - # according to mysql the TEMPORARY TABLE name should not have to be unique because - # it is only available to the current connection. This program will be invoked via command line, but if it - # were to be invoked over mod_python this could cuase problems. mod_python will keep the connection alive - # in its executing threads ( i think) so there is a potential for the table not being dropped between users. - #XZ, 01/29/2009: To prevent the potential risk, I generate random table names and drop the tables after use them. - - - # the 'input_species_geneid' could be rat or human geneid, need to translate it to mouse geneid - translated_mouse_geneid = self.translateToMouseGeneID (species, input_species_geneid) - - tmpTableName_1 = webqtlUtil.genRandStr(prefix="LITERATURE") - - q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_1 - q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName_1, translated_mouse_geneid) - q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName_1, translated_mouse_geneid,translated_mouse_geneid) - for x in [q1,q2,q3]: self.cursor.execute(x) - - #XZ, 09/23/2008: Just use the top records insteard of using all records - tmpTableName_2 = webqtlUtil.genRandStr(prefix="TOPLITERATURE") - - q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_2 - self.cursor.execute(q1) - q2 = 'SELECT GeneId1, GeneId2, value FROM %s ORDER BY value DESC' % tmpTableName_1 - self.cursor.execute(q2) - result = self.cursor.fetchall() - - counter = 0 #this is to count how many records being inserted into table - for one_row in result: - mouse_geneid1, mouse_geneid2, lit_corr_alue = one_row - - #mouse_geneid1 has been tested before, now should test if mouse_geneid2 has corresponding geneid in other species - translated_species_geneid = 0 - if species == 'mouse': - translated_species_geneid = mouse_geneid2 - elif species == 'rat': - self.cursor.execute( "SELECT rat FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) ) - record = self.cursor.fetchone() - if record: - translated_species_geneid = record[0] - elif species == 'human': - self.cursor.execute( "SELECT human FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) ) - record = self.cursor.fetchone() - if record: - translated_species_geneid = record[0] - - if translated_species_geneid: - self.cursor.execute( 'INSERT INTO %s (GeneId1, GeneId2, value) VALUES (%d,%d,%f)' % (tmpTableName_2, int(input_species_geneid),int(translated_species_geneid), float(lit_corr_alue)) ) - counter = counter + 1 - - #pay attention to the number - if (counter > 2*returnNumber): - break - - self.cursor.execute('DROP TEMPORARY TABLE %s' % tmpTableName_1) - - return tmpTableName_2 - - - - #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it. - def fetchLitCorrelations(self, species, GeneId, db, returnNumber): ### Used to generate Lit Correlations when calculations are done from text file. dcrowell August 2008 - """Uses getTempLiteratureTable to generate table of literatire correlations. This function then gathers that data and - pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance. - Returns a dictionary of 'TraitID':'LitCorr' for the requested correlation""" - - tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber) - - query = "SELECT %s.Name, %s.value" % (db.type,tempTable) - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) - query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable, db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - - self.cursor.execute(query) - results = self.cursor.fetchall() - - litCorrDict = {} - - for entry in results: - traitName,litcorr = entry - litCorrDict[traitName] = litcorr - - self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable) - - return litCorrDict - - - def get_traits(self, vals): - - #Todo: Redo cached stuff using memcached - if False: - lit_corrs = {} - tissue_corrs = {} - use_lit = False - if self.method == METHOD_LIT: - lit_corrs = self.fetchLitCorrelations(species=self.species, GeneId=self.gene_id, db=self.db, returnNumber=self.returnNumber) - use_lit = True - - use_tissue_corr = False - if self.method in TISSUE_METHODS: - tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number) - use_tissue_corr = True - - DatabaseFileName = self.getFileName( target_db_name=self.target_db_name ) - datasetFile = open(webqtlConfig.CACHEDIR+DatabaseFileName,'r') - - #XZ, 01/08/2009: read the first line - line = datasetFile.readline() - cached_sample_names = webqtlUtil.readLineCSV(line)[1:] - - #XZ, 01/08/2009: This step is critical. It is necessary for this new method. - #XZ: The original function fetchAllDatabaseData uses all strains stored in variable _strains to - #XZ: retrieve the values of each strain from database in real time. - #XZ: The new method uses all strains stored in variable dataset_strains to create a new variable - #XZ: _newvals. _newvals has the same length as dataset_strains. The items in _newvals is in - #XZ: the same order of items in dataset_strains. The value of each item in _newvals is either - #XZ: the value of correspinding strain in _vals or 'None'. - new_vals = [] - for name in cached_sample_names: - if name in self.sample_names: - new_vals.append(float(vals[self.sample_names.index(name)])) - else: - new_vals.append('None') - - nnCorr = len(new_vals) - - #XZ, 01/14/2009: If literature corr or tissue corr is selected, - #XZ: there is no need to use parallel computing. - - traits = [] - data_start = 1 - for line in datasetFile: - raw_trait = webqtlUtil.readLineCSV(line) - trait = Trait.from_csv(raw_trait, data_start) - trait.lit_corr = lit_corrs.get(trait.name) - trait.tissue_corr, trait.p_tissue = tissue_corrs.get(trait.name, (None, None)) - traits.append(trait) - - return traits, new_vals - - else: - traits = self.fetchAllDatabaseData(species=self.dataset.species, - GeneId=self.gene_id, - GeneSymbol=self.trait.symbol, - strains=self.sample_names, - db=self.db, - method=self.method, - returnNumber=self.returnNumber, - tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) - totalTraits = len(traits) #XZ, 09/18/2008: total trait number - - return traits - - def calculate_corr_for_all_tissues(self, tissue_dataset_id=None): - - symbol_corr_dict = {} - symbol_pvalue_dict = {} - - primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( - GeneNameLst=[self.this_trait.symbol], - TissueProbeSetFreezeId=tissue_dataset_id) - primary_trait_value = primary_trait_symbol_value_dict.values()[0] - - symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( - gene_name_list=[], - tissue_dataset_id=tissue_dataset_id) - - symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( - primaryTraitValue, - SymbolValueDict, - method=self.corr_method) - #else: - # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( - # primaryTraitValue, - # SymbolValueDict) - - return (symbolCorrDict, symbolPvalueDict) - - - def correlate(self): - self.correlation_data = collections.defaultdict(list) - for trait, values in self.target_dataset.trait_data.iteritems(): - values_1 = [] - values_2 = [] - for index,sample in enumerate(self.target_dataset.samplelist): - target_value = values[index] - if sample in self.sample_data.keys(): - this_value = self.sample_data[sample] - values_1.append(this_value) - values_2.append(target_value) - correlation = calCorrelation(values_1, values_2) - self.correlation_data[trait] = correlation - - def getFileName(self, target_db_name): ### dcrowell August 2008 - """Returns the name of the reference database file with which correlations are calculated. - Takes argument cursor which is a cursor object of any instance of a subclass of templatePage - Used by correlationPage""" - - dataset_id = str(self.target_dataset.id) - dataset_fullname = self.target_dataset.fullname.replace(' ','_') - dataset_fullname = dataset_fullname.replace('/','_') - - FileName = 'ProbeSetFreezeId_' + dataset_id + '_FullName_' + dataset_fullname + '.txt' - - return FileName - - def do_parallel_correlation(self, db_filename, num_overlap): - - #XZ, 01/14/2009: This method is for parallel computing only. - #XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1) - #XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected - def compute_corr(input_nnCorr, input_trait, input_list, corr_method): - - import math - import reaper - - def cmpOrder2(A,B): - try: - if A[-1] < B[-1]: - return -1 - elif A[-1] == B[-1]: - return 0 - else: - return 1 - except: - return 0 - - def calCorrelation(dbdata,userdata,N): - X = [] - Y = [] - for i in range(N): - if (dbdata[i] != None and userdata[i] != None) and (dbdata[i] != "None" and userdata[i] != "None"): - X.append(float(dbdata[i])) - Y.append(float(userdata[i])) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = reduce(lambda x,y:x+y,X,0.0) - sy = reduce(lambda x,y:x+y,Y,0.0) - meanx = sx/NN - meany = sy/NN - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - xyd += (X[i] - meanx)*(Y[i]-meany) - sxd += (X[i] - meanx)*(X[i] - meanx) - syd += (Y[i] - meany)*(Y[i] - meany) - try: - corr = xyd/(math.sqrt(sxd)*math.sqrt(syd)) - except: - corr = 0 - return (corr,NN) - - def calCorrelationRank(xVals,yVals,N): - """ - Calculated Spearman Ranked Correlation. The algorithm works - by setting all tied ranks to the average of those ranks (for - example, if ranks 5-10 all have the same value, each will be set - to rank 7.5). - """ - - XX = [] - YY = [] - j = 0 - - for i in range(len(xVals)): - if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"): - XX.append((j,float(xVals[i]))) - YY.append((j,float(yVals[i]))) - j = j+1 - - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - # allcorrelations = [] - - correlation_data = {} - for i, line in enumerate(input_list): - if i == 0: - continue - tokens = line.split('","') - tokens[-1] = tokens[-1][:-2] #remove the last " - tokens[0] = tokens[0][1:] #remove the first " - - traitdataName = tokens[0] - database_trait = tokens[1:] - - #print("database_trait:", database_trait) - - #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ - # if corr_method == 'pearson': - # sample_r, sample_p = scipy.stats.pearsonr(input_trait, database_trait) - # else: - # sample_r, sample_p = scipy.stats.spearmanr(input_trait, database_trait) - - if corr_method == "pearson": #XZ: Pearson's r - sample_r, nOverlap = calCorrelation(input_trait, database_trait, input_nnCorr) - else: #XZ: Spearman's rho - sample_r, nOverlap = calCorrelationRank(input_trait, database_trait, input_nnCorr) - - #XZ: calculate corrPValue - if nOverlap < 3: - sample_p = 1.0 - else: - if abs(sample_r) >= 1.0: - sample_p = 0.0 - else: - z_value = 0.5*math.log((1.0+sample_r)/(1.0-sample_r)) - z_value = z_value*math.sqrt(nOverlap-3) - sample_p = 2.0*(1.0 - reaper.normp(abs(z_value))) - - correlation_data[traitdataName] = [sample_r, sample_p, nOverlap] - - # traitinfo = [traitdataName, sample_r, nOverlap] - # allcorrelations.append(traitinfo) - - return correlation_data - # return allcorrelations - - - datasetFile = open(webqtlConfig.GENERATED_TEXT_DIR+db_filename,'r') - - print("Invoking parallel computing") - input_line_list = datasetFile.readlines() - print("Read lines from the file") - all_line_number = len(input_line_list) - - step = 1000 - job_number = math.ceil( float(all_line_number)/step ) - - print("JOB NUMBER", job_number) - - job_input_lists = [] - - print("Configuring jobs") - - for job_index in range( int(job_number) ): - starti = job_index*step - endi = min((job_index+1)*step, all_line_number) - - one_job_input_list = [] - - for i in range( starti, endi ): - one_job_input_list.append( input_line_list[i] ) - - job_input_lists.append( one_job_input_list ) - - print("Creating pp servers") - - ppservers = () - # Creates jobserver with automatically detected number of workers - job_server = pp.Server(ppservers=ppservers) - - print("Done creating servers") - - jobs = [] - results = [] - - print("Starting parallel computation, submitting jobs") - for one_job_input_list in job_input_lists: #pay attention to modules from outside - jobs.append( job_server.submit(func=compute_corr, args=(num_overlap, self.this_trait_vals, one_job_input_list, self.corr_method), depfuncs=(), modules=("webqtlUtil",)) ) - print("Done submitting jobs") - - for one_job in jobs: - one_result = one_job() - self.correlation_data.update(one_result) - # one_result = one_job() - # results.append( one_result ) - - #print("CORRELATION DATA:", self.correlation_data) - - # print("Acquiring results") - - # for one_result in results: - # for one_traitinfo in one_result: - # allcorrelations.append( one_traitinfo ) - def generate_corr_json(corr_results, this_trait, dataset, target_dataset): results_list = [] for i, trait in enumerate(corr_results): diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 077386a3..4bb4d65d 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -23,7 +23,6 @@ from __future__ import absolute_import, print_function, division import sys # sys.path.append(".") Never do this in a webserver! -import gc import string import cPickle import os diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index 9515d23a..6fda02fd 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -2,7 +2,6 @@ # Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> import sys from numpy import * -import scipy as sp # SciPy import rpy2.robjects as ro # R Objects import rpy2.rinterface as ri @@ -24,60 +23,38 @@ from utility import helper_functions from utility.tools import locate from rpy2.robjects.packages import importr -utils = importr("utils") + +import utility.logger +logger = utility.logger.getLogger(__name__ ) ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_options = ro.r["options"] # Map the options function -r_read_csv = ro.r["read.csv"] # Map the read.csv function -r_dim = ro.r["dim"] # Map the dim function -r_c = ro.r["c"] # Map the c function r_t = ro.r["t"] # Map the t function -r_cat = ro.r["cat"] # Map the cat function -r_paste = ro.r["paste"] # Map the paste function -r_unlist = ro.r["unlist"] # Map the unlist function -r_head = ro.r["head"] # Map the unlist function -r_unique = ro.r["unique"] # Map the unique function -r_length = ro.r["length"] # Map the length function r_unlist = ro.r["unlist"] # Map the unlist function r_list = ro.r.list # Map the list function -r_matrix = ro.r.matrix # Map the matrix function -r_seq = ro.r["seq"] # Map the seq function -r_table = ro.r["table"] # Map the table function -r_names = ro.r["names"] # Map the names function -r_sink = ro.r["sink"] # Map the sink function -r_is_NA = ro.r["is.na"] # Map the is.na function -r_file = ro.r["file"] # Map the file function r_png = ro.r["png"] # Map the png function for plotting r_dev_off = ro.r["dev.off"] # Map the dev.off function -r_save_image = ro.r["save.image"] # Map the save.image function -r_class = ro.r["class"] # Map the class function -r_save = ro.r["save"] # Map the save function r_write_table = ro.r["write.table"] # Map the write.table function -r_read_table = ro.r["read.table"] # Map the read.table function -r_as_data_frame = ro.r["as.data.frame"] # Map the write.table function r_data_frame = ro.r["data.frame"] # Map the write.table function r_as_numeric = ro.r["as.numeric"] # Map the write.table function class CTL(object): def __init__(self): - print("Initialization of CTL") + logger.info("Initialization of CTL") #log = r_file("/tmp/genenetwork_ctl.log", open = "wt") - #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file + #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file #r_sink(log, type = "message") - r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive + r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive r_options(stringsAsFactors = False) - print("Initialization of CTL done, package loaded in R session") + logger.info("Initialization of CTL done, package loaded in R session") self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function - self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function - self.r_CTLnetwork = ro.r["CTLnetwork"] # Map the CTLnetwork function - self.r_CTLprofiles = ro.r["CTLprofiles"] # Map the CTLprofiles function self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function self.nodes_list = [] self.edges_list = [] - print("Obtained pointers to CTL functions") + logger.info("Obtained pointers to CTL functions") def addNode(self, gt): node_dict = { 'data' : {'id' : str(gt.name) + ":" + str(gt.dataset.name), @@ -100,20 +77,20 @@ class CTL(object): self.edges_list.append(edge_dict) def run_analysis(self, requestform): - print("Starting CTL analysis on dataset") + logger.info("Starting CTL analysis on dataset") self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] - print("strategy:", requestform.get("strategy")) + logger.debug("strategy:", requestform.get("strategy")) strategy = requestform.get("strategy") - print("nperm:", requestform.get("nperm")) + logger.debug("nperm:", requestform.get("nperm")) nperm = int(requestform.get("nperm")) - print("parametric:", requestform.get("parametric")) + logger.debug("parametric:", requestform.get("parametric")) parametric = bool(requestform.get("parametric")) - print("significance:", requestform.get("significance")) + logger.debug("significance:", requestform.get("significance")) significance = float(requestform.get("significance")) # Get the name of the .geno file belonging to the first phenotype @@ -123,7 +100,7 @@ class CTL(object): genofilelocation = locate(dataset.group.name + ".geno", "genotype") parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() - print(dataset.group) + logger.debug("dataset group: ", dataset.group) # Create a genotype matrix individuals = parser.individuals markers = [] @@ -133,14 +110,14 @@ class CTL(object): markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) - print(len(genotypes) / len(individuals), "==", len(parser.markers)) + logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers)) rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: - print("retrieving data for", trait) + logger.debug("retrieving data for", trait) if trait != "": ts = trait.split(':') gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) @@ -153,7 +130,7 @@ class CTL(object): rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) - print(rPheno) + logger.debug(rPheno) # Use a data frame to store the objects rPheno = r_data_frame(rPheno, check_names = False) @@ -196,10 +173,9 @@ class CTL(object): sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) - print(type(significant)) if not type(significant) == ri.RNULLType: for x in range(len(significant[0])): - print(significant[0][x], significant[1][x], significant[2][x]) # Debug to console + logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console tsS = significant[0][x].split(':') # Source tsT = significant[2][x].split(':') # Target gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB @@ -214,7 +190,6 @@ class CTL(object): self.elements = json.dumps(self.nodes_list + self.edges_list) def loadImage(self, path, name): - print("pre-loading imgage results:", self.results[path]) imgfile = open(self.results[path], 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") @@ -229,7 +204,7 @@ class CTL(object): n = n + 1 def process_results(self, results): - print("Processing CTL output") + logger.info("Processing CTL output") template_vars = {} template_vars["results"] = self.results template_vars["elements"] = self.elements diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 17625474..e2a0a479 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -79,38 +79,6 @@ class DoSearch(object): else: return None -class QuickMrnaAssaySearch(DoSearch): - """A general search for mRNA assays""" - - DoSearch.search_types['quick_mrna_assay'] = "QuickMrnaAssaySearch" - - base_query = """SELECT ProbeSet.Name as ProbeSet_Name, - ProbeSet.Symbol as ProbeSet_Symbol, - ProbeSet.description as ProbeSet_Description, - ProbeSet.Chr_num as ProbeSet_Chr_Num, - ProbeSet.Mb as ProbeSet_Mb, - ProbeSet.name_num as ProbeSet_name_num - FROM ProbeSet """ - - header_fields = ['Index', - 'Record', - 'Symbol', - 'Location'] - - def run(self): - """Generates and runs a search for assays across all mRNA expression datasets""" - - logger.debug("Running ProbeSetSearch") - query = self.base_query + """WHERE (MATCH (ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - ProbeSet.alias) - AGAINST ('%s' IN BOOLEAN MODE)) - """ % (escape(self.search_term[0])) - - return self.execute(query) - - class MrnaAssaySearch(DoSearch): """A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites""" @@ -311,54 +279,6 @@ class PhenotypeSearch(DoSearch): return self.execute(query) -class QuickPhenotypeSearch(PhenotypeSearch): - """A search across all phenotype datasets""" - - DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch" - - base_query = """SELECT Species.Name as Species_Name, - PublishFreeze.FullName as Dataset_Name, - PublishFreeze.Name, - PublishXRef.Id, - PublishFreeze.createtime as thistable, - Publication.PubMed_ID as Publication_PubMed_ID, - Phenotype.Post_publication_description as Phenotype_Name - FROM Phenotype, - PublishFreeze, - Publication, - PublishXRef, - InbredSet, - Species """ - - search_fields = ('Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors') - - def compile_final_query(self, where_clause = ''): - """Generates the final query string""" - - query = (self.base_query + - """WHERE %s - PublishXRef.PhenotypeId = Phenotype.Id and - PublishXRef.PublicationId = Publication.Id and - PublishXRef.InbredSetId = InbredSet.Id and - InbredSet.SpeciesId = Species.Id""" % where_clause) - - return query - - def run(self): - """Generates and runs a search across all phenotype datasets""" - - query = self.compile_final_query(where_clause = self.get_where_clause()) - - return self.execute(query) - class GenotypeSearch(DoSearch): """A search within a genotype dataset""" @@ -767,14 +687,6 @@ class MeanSearch(MrnaAssaySearch): return where_clause - def get_final_query(self): - self.where_clause = self.get_where_clause() - logger.debug("where_clause is:", pf(self.where_clause)) - - self.query = self.compile_final_query(where_clause = self.where_clause) - - return self.query - def run(self): self.where_clause = self.get_where_clause() logger.debug("where_clause is:", pf(self.where_clause)) @@ -948,7 +860,6 @@ if __name__ == "__main__": from base import webqtlConfig from base.data_set import create_dataset - from base.templatePage import templatePage from utility import webqtlUtil from db import webqtlDatabaseFunction diff --git a/wqflask/wqflask/export_traits.py b/wqflask/wqflask/export_traits.py index f8fce929..ab4c0d7c 100644 --- a/wqflask/wqflask/export_traits.py +++ b/wqflask/wqflask/export_traits.py @@ -1,6 +1,5 @@ from __future__ import print_function, division -import operator import csv import xlsxwriter import StringIO diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index 56ff11cd..af75d441 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, print_function, division import sys # sys.path.append(".") Never in a running webserver -import gc import string import cPickle import os @@ -16,7 +15,6 @@ import resource import scipy import numpy as np -from scipy import linalg from pprint import pformat as pf @@ -82,21 +80,16 @@ class Heatmap(object): this_trait = trait_db[0] this_sample_data = this_trait.data - #self.sample_data[this_trait.name] = [] this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) - #self.sample_data[this_trait.name].append(this_sample_data[sample].value) else: this_trait_vals.append('') - #self.sample_data[this_trait.name].append('') self.sample_data.append(this_trait_vals) self.gen_reaper_results() - #self.gen_pylmm_results() - #chrnames = [] lodnames = [] chr_pos = [] pos = [] @@ -106,9 +99,6 @@ class Heatmap(object): lodnames.append(trait) for marker in self.dataset.group.markers.markers: - #if marker['chr'] not in chrnames: - # chr_ob = [marker['chr'], "filler"] - # chrnames.append(chr_ob) chr_pos.append(marker['chr']) pos.append(marker['Mb']) markernames.append(marker['name']) @@ -126,9 +116,6 @@ class Heatmap(object): json_data = self.json_data ) - print("self.js_data:", self.js_data) - - def gen_reaper_results(self): self.trait_results = {} for trait_db in self.trait_list: @@ -145,172 +132,14 @@ class Heatmap(object): trimmed_samples.append(samples[i]) trimmed_values.append(values[i]) - self.lrs_array = genotype.permutation(strains = trimmed_samples, - trait = trimmed_values, - nperm= self.num_permutations) - - #self.suggestive = self.lrs_array[int(self.num_permutations*0.37-1)] - #self.significant = self.lrs_array[int(self.num_permutations*0.95-1)] - reaper_results = genotype.regression(strains = trimmed_samples, trait = trimmed_values) - lrs_values = [float(qtl.lrs) for qtl in reaper_results] - print("lrs_values:", lrs_values) - #self.dataset.group.markers.add_pvalues(p_values) self.trait_results[this_trait.name] = [] for qtl in reaper_results: if qtl.additive > 0: self.trait_results[this_trait.name].append(-float(qtl.lrs)) else: - self.trait_results[this_trait.name].append(float(qtl.lrs)) - #for lrs in lrs_values: - # if - # self.trait_results[this_trait.name].append(lrs) - - - #this_db_samples = self.dataset.group.samplelist - #this_sample_data = this_trait.data - ##print("this_sample_data", this_sample_data) - #this_trait_vals = [] - #for index, sample in enumerate(this_db_samples): - # if sample in this_sample_data: - # sample_value = this_sample_data[sample].value - # this_trait_vals.append(sample_value) - # else: - # this_trait_vals.append("x") - - #pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals]) - - #key = "pylmm:input:" + str(self.temp_uuid) - #print("key is:", pf(key)) - - #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - #no_val_samples = self.identify_empty_samples(this_trait_vals) - #trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - #genotype_matrix = np.array(trimmed_genotype_data).T - - #print("genotype_matrix:", str(genotype_matrix.tolist())) - #print("pheno_vector:", str(pheno_vector.tolist())) - - #params = dict(pheno_vector = pheno_vector.tolist(), - # genotype_matrix = genotype_matrix.tolist(), - # restricted_max_likelihood = True, - # refit = False, - # temp_uuid = str(self.temp_uuid), - # - # # meta data - # timestamp = datetime.datetime.now().isoformat(), - # ) - # - #json_params = json.dumps(params) - ##print("json_params:", json_params) - #Redis.set(key, json_params) - #Redis.expire(key, 60*60) - #print("before printing command") - # - #command = 'python lmm.py --key {} --species {}'.format(key, - # "other") - #print("command is:", command) - #print("after printing command") - # - #os.system(command) - # - #json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60) - - def gen_pylmm_results(self): - # This function is NOT used. If it is, we should use a shared function with marker_regression.py - self.trait_results = {} - for trait_db in self.trait_list: - this_trait = trait_db[0] - #this_db = trait_db[1] - self.dataset.group.get_markers() - - this_db_samples = self.dataset.group.samplelist - this_sample_data = this_trait.data - #print("this_sample_data", this_sample_data) - this_trait_vals = [] - for index, sample in enumerate(this_db_samples): - if sample in this_sample_data: - sample_value = this_sample_data[sample].value - this_trait_vals.append(sample_value) - else: - this_trait_vals.append("x") - - pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals]) - - key = "pylmm:input:" + str(self.temp_uuid) - #print("key is:", pf(key)) - - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - no_val_samples = self.identify_empty_samples(this_trait_vals) - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - genotype_matrix = np.array(trimmed_genotype_data).T - - #print("genotype_matrix:", str(genotype_matrix.tolist())) - #print("pheno_vector:", str(pheno_vector.tolist())) - - params = dict(pheno_vector = pheno_vector.tolist(), - genotype_matrix = genotype_matrix.tolist(), - restricted_max_likelihood = True, - refit = False, - temp_uuid = str(self.temp_uuid), - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - json_params = json.dumps(params) - #print("json_params:", json_params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - print("before printing command") - - command = PYLMM_COMMAND+' --key {} --species {}'.format(key, - "other") - print("command is:", command) - print("after printing command") - - os.system(command) - - json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60) - results = json.loads(json_results[1]) - p_values = [float(result) for result in results['p_values']] - #print("p_values:", p_values) - self.dataset.group.markers.add_pvalues(p_values) - - self.trait_results[this_trait.name] = [] - for marker in self.dataset.group.markers.markers: - self.trait_results[this_trait.name].append(marker['lod_score']) - - - def identify_empty_samples(self, values): - no_val_samples = [] - for sample_count, val in enumerate(values): - if val == "x": - no_val_samples.append(sample_count) - return no_val_samples - - def trim_genotypes(self, genotype_data, no_value_samples): - trimmed_genotype_data = [] - for marker in genotype_data: - new_genotypes = [] - for item_count, genotype in enumerate(marker): - if item_count in no_value_samples: - continue - try: - genotype = float(genotype) - except ValueError: - genotype = np.nan - pass - new_genotypes.append(genotype) - trimmed_genotype_data.append(new_genotypes) - return trimmed_genotype_data - - + self.trait_results[this_trait.name].append(float(qtl.lrs))
\ No newline at end of file diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index fda7773f..2c60dd70 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -6,7 +6,7 @@ from flask import Flask, g #Just return a list of dictionaries #each dictionary contains sub-dictionary -def loadGenes(chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse'): +def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] diff --git a/wqflask/wqflask/marker_regression/MarkerRegressionPage.py b/wqflask/wqflask/marker_regression/MarkerRegressionPage.py deleted file mode 100644 index deb47ad0..00000000 --- a/wqflask/wqflask/marker_regression/MarkerRegressionPage.py +++ /dev/null @@ -1,1648 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -import time -import string -import math -from math import * -import piddle as pid -import sys,os -import httplib, urllib - -from htmlgen import HTMLgen2 as HT -from utility import Plot -from intervalAnalyst import GeneUtil -from base.webqtlTrait import webqtlTrait -from base.templatePage import templatePage -from utility import webqtlUtil -from base import webqtlConfig -from db import webqtlDatabaseFunction -from base.GeneralObject import GeneralObject - -import reaper -import cPickle -from utility.THCell import THCell -from utility.TDCell import TDCell - -class MarkerRegressionPage(templatePage): - - def __init__(self, fd): - - templatePage.__init__(self, fd) - - if not self.openMysql(): - return - - self.initializeParameters(fd) - - filename= webqtlUtil.genRandStr("Itvl_") - ChrList,ChrNameOrderIdDict,ChrOrderIdNameDict,ChrLengthMbList= self.getChrNameOrderIdLength(RISet=fd.RISet) - - if self.mappingMethodId == '4': # For PLINK - - traitInfoList = string.split(string.strip(fd.identification),':') - probesetName = string.strip(traitInfoList[-1]) - plinkOutputFileName= webqtlUtil.genRandStr("%s_%s_"%(fd.RISet,probesetName)) - - # get related values from fd.allTraitData; the format of 'allTraitValueDict'is {strainName1: value=-0.2...} - fd.readData() - allTraitValueDict = fd.allTraitData - - #automatically generate pheno txt file for PLINK - self.genPhenoTxtFileForPlink(phenoFileName=plinkOutputFileName,RISetName=fd.RISet,probesetName=probesetName, valueDict=allTraitValueDict) - # os.system full path is required for input and output files; specify missing value is -9999 - plink_command = '%splink/plink --noweb --ped %splink/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %splink/%s.map --pheno %s/%s.txt --pheno-name %s --missing-phenotype -9999 --out %s%s --assoc ' % (webqtlConfig.GENODIR, webqtlConfig.GENODIR, fd.RISet, webqtlConfig.GENODIR, fd.RISet, webqtlConfig.TMPDIR, plinkOutputFileName, probesetName, webqtlConfig.TMPDIR, plinkOutputFileName) - - os.system(plink_command) - - if fd.identification: - heading2 = HT.Paragraph('Trait ID: %s' % fd.identification) - heading2.__setattr__("class","subtitle") - self.dict['title'] = '%s: Genome Association' % fd.identification - else: - heading2 = "" - self.dict['title'] = 'Genome Association' - - if fd.traitInfo: - symbol,chromosome,MB = string.split(fd.traitInfo,'\t') - heading3 = HT.Paragraph('[ ',HT.Strong(HT.Italic('%s' % symbol,id="green")),' on Chr %s @ %s Mb ]' % (chromosome,MB)) - else: - heading3 = "" - - heading = HT.Paragraph('Trait Data Entered for %s Set' % fd.RISet) - heading.__setattr__("class","title") - - # header info part:Trait Data Entered for HLC Set & Trait ID: - headerdiv = HT.TR(HT.TD(heading, heading2,heading3, width='45%',valign='top', align='left', bgColor='#eeeeee')) - - self.ChrList=ChrList # get chr name from '1' to 'X' - self.ChrLengthMbList = ChrLengthMbList - - # build plink result dict based on chr, key is chr name, value is in list type including Snpname, bp and pvalue info - plinkResultDict={} - count,minPvalue,plinkResultDict =self.getPlinkResultDict(outputFileName=plinkOutputFileName,thresholdPvalue=self.pValue,ChrOrderIdNameDict=ChrOrderIdNameDict) - - # if can not find results which are matched with assigned p-value, system info will show up - if count >0: - - #for genome association report table - reportTable="" - # sortable table object - resultstable,tblobj,bottomInfo = self.GenReportForPLINK(ChrNameOrderIdDict=ChrNameOrderIdDict, RISet=fd.RISet,plinkResultDict=plinkResultDict,thresholdPvalue=self.pValue,chrList=self.ChrList) - - # creat object for result table for sort function - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - - sortby = ("Index", "up") - reportTable =HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "0"), Id="sortable") - - descriptionTable = HT.TableLite(border=0, cellpadding=0, cellspacing=0) - descriptionTable.append(HT.TR(HT.TD(reportTable, colspan=3))) - descriptionTable.append(HT.TR(HT.TD(HT.BR(),HT.BR()))) - descriptionTable.append(bottomInfo) - - # get each chr's length - self.ChrLengthMbList = map(lambda x: x/1000000.0, self.ChrLengthMbList) # change unit from bp to mb - self.ChrLengthMbSum = reduce(lambda x, y:x+y, self.ChrLengthMbList, 0.0)# get total length of all chrs - if self.ChrLengthMbList: - self.GraphInterval = self.ChrLengthMbSum/(len(self.ChrLengthMbList)*12) #Empirical Mb interval - else: - self.GraphInterval = 1 - - # for human data, there's no CM value - self.ChrLengthCMList = [] - self.ChrLengthCMSum = 0 - - # begin: common part with human data - intCanvas = pid.PILCanvas(size=(self.graphWidth,self.graphHeight)) - gifmap = self.plotIntMappingForPLINK(fd, intCanvas, startMb = self.startMb, endMb = self.endMb, plinkResultDict=plinkResultDict) - - intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, filename), format='png') - intImg=HT.Image('/image/'+filename+'.png', border=0, usemap='#WebQTLImageMap') - - TD_LR = HT.TR(HT.TD(HT.Blockquote(gifmap,intImg, HT.P()), bgColor='#eeeeee', height = 200)) - self.dict['body'] = str(headerdiv)+str(TD_LR)+str(resultstable)+str(HT.TR(HT.TD(descriptionTable))) - - else: - heading = "Genome Association" - detail = ['There is no association with marker that meets this criteria. Please provide a less stringend threshold. The minimun p-value is %s.'%minPvalue] - self.error(heading=heading,detail=detail) - return - - elif self.mappingMethodId == '1': # QTLreaper result - if not fd.genotype: - fd.readData() - - fd.parentsf14regression = fd.formdata.getvalue('parentsf14regression') - weightedRegression = fd.formdata.getvalue('applyVarianceSE') - - if fd.parentsf14regression and fd.genotype_2: - _genotype = fd.genotype_2 - else: - _genotype = fd.genotype_1 - - _strains, _vals, _vars, N = fd.informativeStrains(_genotype.prgy, weightedRegression) - - if fd.identification: - heading2 = HT.Paragraph('Trait ID: %s' % fd.identification) - heading2.__setattr__("class","subtitle") - self.dict['title'] = '%s: Genome Association' % fd.identification - else: - heading2 = "" - self.dict['title'] = 'Genome Association' - - if fd.traitInfo: - symbol,chromosome,MB = string.split(fd.traitInfo,'\t') - heading3 = HT.Paragraph('[ ',HT.Strong(HT.Italic('%s' % symbol,id="green")),' on Chr %s @ %s Mb ]' % (chromosome,MB)) - else: - heading3 = "" - - if N < webqtlConfig.KMININFORMATIVE: - heading = "Genome Association" - detail = ['Fewer than %d strain data were entered for %s data set. No mapping attempted.' % (webqtlConfig.KMININFORMATIVE, fd.RISet)] - self.error(heading=heading,detail=detail) - return - else: - heading = HT.Paragraph('Trait Data Entered for %s Set' % fd.RISet) - heading.__setattr__("class","title") - - datadiv = HT.TD(heading, heading2,heading3, width='45%',valign='top', align='left', bgColor='#eeeeee') - resultstable,tblobj,bottomInfo = self.GenReport(ChrNameOrderIdDict,fd, _genotype, _strains, _vals, _vars) - #resultstable = self.GenReport(fd, _genotype, _strains, _vals, _vars) - - # creat object for result table for sort function - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - - sortby = ("Index", "up") - reportTable =HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "0"), Id="sortable") - - descriptionTable = HT.TableLite(border=0, cellpadding=0, cellspacing=0) - descriptionTable.append(HT.TR(HT.TD(reportTable, colspan=3))) - descriptionTable.append(HT.TR(HT.TD(HT.BR(),HT.BR()))) - descriptionTable.append(bottomInfo) - - self.traitList=_vals - - ##########################plot####################### - - ################################################################ - # Generate Chr list and Retrieve Length Information - ################################################################ - self.genotype= _genotype - self.ChrList = [("All", -1)] - - for i, indChr in enumerate(self.genotype): - self.ChrList.append((indChr.name, i)) - - self.cursor.execute(""" - Select - Length from Chr_Length, InbredSet - where - Chr_Length.SpeciesId = InbredSet.SpeciesId AND - InbredSet.Name = '%s' AND - Chr_Length.Name in (%s) - Order by - OrderId - """ % (fd.RISet, string.join(map(lambda X: "'%s'" % X[0], self.ChrList[1:]), ", "))) - - self.ChrLengthMbList = self.cursor.fetchall() - self.ChrLengthMbList = map(lambda x: x[0]/1000000.0, self.ChrLengthMbList) - self.ChrLengthMbSum = reduce(lambda x, y:x+y, self.ChrLengthMbList, 0.0) - if self.ChrLengthMbList: - self.MbGraphInterval = self.ChrLengthMbSum/(len(self.ChrLengthMbList)*12) #Empirical Mb interval - else: - self.MbGraphInterval = 1 - - self.ChrLengthCMList = [] - for i, _chr in enumerate(self.genotype): - self.ChrLengthCMList.append(_chr[-1].cM - _chr[0].cM) - self.ChrLengthCMSum = reduce(lambda x, y:x+y, self.ChrLengthCMList, 0.0)# used for calculate plot scale - - self.GraphInterval = self.MbGraphInterval #Mb - - # begin: common part with human data - intCanvas = pid.PILCanvas(size=(self.graphWidth,self.graphHeight)) - gifmap = self.plotIntMapping(fd, intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= "") - filename= webqtlUtil.genRandStr("Itvl_") - intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, filename), format='png') - intImg=HT.Image('/image/'+filename+'.png', border=0, usemap='#WebQTLImageMap') - - ################################################################ - # footnote goes here - ################################################################ - btminfo = HT.Paragraph(Id="smallsize") #Small('More information about this graph is available here.') - - if (self.additiveChecked): - btminfo.append(HT.BR(), 'A positive additive coefficient (', HT.Font('green', color='green'), ' line) indicates that %s alleles increase trait values. In contrast, a negative additive coefficient (' % fd.ppolar, HT.Font('red', color='red'), ' line) indicates that %s alleles increase trait values.' % fd.mpolar) - - - TD_LR = HT.TR(HT.TD(HT.Blockquote(gifmap,intImg, HT.P()), bgColor='#eeeeee', height = 200)) - - self.dict['body'] = str(datadiv)+str(TD_LR)+str(resultstable)+str(HT.TR(HT.TD(descriptionTable))) - - # end: common part with human data - - else: - pass - - - # add by NL 10-2-2011 - def initializeParameters(self, fd): - """ - Initializes all of the MarkerRegressionPage class parameters, - acquiring most values from the formdata (fd) - """ - ################################### - # manhattam plot parameters - ################################### - - self.graphHeight = 600 - self.graphWidth = 1280 - self.plotScale = 'physic' - self.selectedChr = -1 - self.GRAPH_BACK_DARK_COLOR = pid.HexColor(0xF1F1F9) - self.GRAPH_BACK_LIGHT_COLOR = pid.HexColor(0xFBFBFF) - self.LRS_COLOR = pid.HexColor(0x0000FF) - self.LRS_LOD ='LRS' - self.lrsMax = float(fd.formdata.getvalue('lrsMax', 0)) - self.startMb = fd.formdata.getvalue('startMb', "-1") - self.endMb = fd.formdata.getvalue('endMb', "-1") - self.mappingMethodId = fd.formdata.getvalue('mappingMethodId', "0") - self.permChecked=True - self.multipleInterval=False - self.SIGNIFICANT_WIDTH = 5 - self.SUGGESTIVE_WIDTH = 5 - self.SIGNIFICANT_COLOR = pid.HexColor(0xEBC7C7) - self.SUGGESTIVE_COLOR = pid.gainsboro - self.colorCollection = [self.LRS_COLOR] - self.additiveChecked= True - self.ADDITIVE_COLOR_POSITIVE = pid.green - self.legendChecked =False - self.pValue=float(fd.formdata.getvalue('pValue',-1)) - - # allow user to input p-value greater than 1, - # in this case, the value will be treated as -lgP value. so the input value needs to be transferred to power of 10 format - if self.pValue >1: - self.pValue =10**-(self.pValue) - - try: - self.startMb = float(self.startMb) - self.endMb = float(self.endMb) - if self.startMb > self.endMb: - temp = self.startMb - self.startMb = self.endMb - self.endMb = temp - #minimal distance 10bp - if self.endMb - self.startMb < 0.00001: - self.endMb = self.startMb + 0.00001 - except: - self.startMb = self.endMb = -1 - - def GenReportForPLINK(self, ChrNameOrderIdDict={},RISet='',plinkResultDict= {},thresholdPvalue=-1,chrList=[]): - - 'Create an HTML division which reports any loci which are significantly associated with the submitted trait data.' - ######################################### - # Genome Association report - ######################################### - locusFormName = webqtlUtil.genRandStr("fm_") - locusForm = HT.Form(cgi = os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), \ - enctype='multipart/form-data', name=locusFormName, submit=HT.Input(type='hidden')) - hddn = {'FormID':'showDatabase','ProbeSetID':'_','database':RISet+"Geno",'CellID':'_', \ - 'RISet':RISet, 'incparentsf1':'on'} - for key in hddn.keys(): - locusForm.append(HT.Input(name=key, value=hddn[key], type='hidden')) - - regressionHeading = HT.Paragraph('Genome Association Report') - regressionHeading.__setattr__("class","title") - - filename= webqtlUtil.genRandStr("GenomeAsscociation_") - fpText = open('%s.txt' % (webqtlConfig.TMPDIR+filename), 'wb') - fpText.write('The loci meet the criteria of P-Value <= %3.6f.\n'%thresholdPvalue) - pValueInfo =HT.Paragraph('The loci meet the criteria of P-Value <= %3.6f.\n'%thresholdPvalue) - - textUrl = HT.Href(text = 'Download', url= '/tmp/'+filename+'.txt', target = "_blank", Class='fs12 fwn') - bottomInfo = HT.TR(HT.TD(HT.Paragraph(textUrl, ' result in tab-delimited text format.', HT.BR(), HT.BR(),Class="fs12 fwn"), colspan=3)) - - tblobj={} # build dict for genTableObj function; keys include header and body - tblobj_header = [] # value of key 'header' - tblobj_body=[] # value of key 'body' - reportHeaderRow=[] # header row list for tblobj_header (html part) - headerList=['Index','SNP Name','Chr','Mb','-log(P)'] - headerStyle="fs14 fwb ffl b1 cw cbrb" # style of the header - cellColorStyle = "fs13 b1 fwn c222" # style of the cells - - if headerList: - for ncol, item in enumerate(headerList): - reportHeaderRow.append(THCell(HT.TD(item, Class=headerStyle, valign='bottom',nowrap='ON'),text=item, idx=ncol)) - #download file for table headers' names - fpText.write('SNP_Name\tChromosome\tMb\t-log(P)\n') - - tblobj_header.append(reportHeaderRow) - tblobj['header']=tblobj_header - - index=1 - for chr in chrList: - - if plinkResultDict.has_key(chr): - if chr in ChrNameOrderIdDict.keys(): - chrOrderId =ChrNameOrderIdDict[chr] - else: - chrOrderId=chr - - valueList=plinkResultDict[chr] - - for value in valueList: - reportBodyRow=[] # row list for tblobj_body (html part) - snpName=value[0] - bp=value[1] - mb=int(bp)/1000000.0 - - try: - pValue =float(value[2]) - except: - pValue =1 - formattedPvalue = -math.log10(pValue) - - formattedPvalue = webqtlUtil.SciFloat(formattedPvalue) - dbSnprs=snpName.replace('rs','') - SnpHref = HT.Href(text=snpName, url="http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=%s"%dbSnprs, target="_blank") - - selectCheck=HT.Input(type="checkbox", Class="checkbox", name="index",value=index, onClick="highlight(this)") - reportBodyRow.append(TDCell(HT.TD(str(index),selectCheck, align='right',Class=cellColorStyle,nowrap='ON'),str(index),index)) - reportBodyRow.append(TDCell(HT.TD(SnpHref, Class=cellColorStyle,nowrap='ON'),snpName, snpName)) - reportBodyRow.append(TDCell(HT.TD(chr, Class=cellColorStyle, align="center",nowrap='ON'),chr, chrOrderId)) - reportBodyRow.append(TDCell(HT.TD('%3.6f'%mb, Class=cellColorStyle, align="center",nowrap='ON'),mb, mb)) - reportBodyRow.append(TDCell(HT.TD(formattedPvalue, Class=cellColorStyle, align="center",nowrap='ON'),formattedPvalue, float(formattedPvalue))) - - fpText.write('%s\t%s\t%3.6f\t%s\n' % (snpName, str(chr), mb, formattedPvalue)) - index+=1 - - tblobj_body.append(reportBodyRow) - - tblobj['body']=tblobj_body - rv=HT.TR(HT.TD(regressionHeading,pValueInfo, locusForm, HT.P(), width='55%',valign='top', align='left',bgColor='#eeeeee')) - - return rv, tblobj,bottomInfo - - - def GenReport(self, ChrNameOrderIdDict,fd, _genotype, _strains, _vals, _vars= []): - 'Create an HTML division which reports any loci which are significantly associated with the submitted trait data.' - #calculate QTL for each trait - self.qtlresults = [] - if webqtlUtil.ListNotNull(_vars): - qtlresults = _genotype.regression(strains = _strains, trait = _vals, variance = _vars) - LRSArray = _genotype.permutation(strains = _strains, trait = _vals, variance = _vars, nperm=fd.nperm) - else: - qtlresults = _genotype.regression(strains = _strains, trait = _vals) - LRSArray = _genotype.permutation(strains = _strains, trait = _vals,nperm=fd.nperm) - - self.qtlresults.append(qtlresults) - - filename= webqtlUtil.genRandStr("GenomeAsscociation_") - - # set suggestive, significant and highly significant LRS - if fd.suggestive == None: - fd.suggestive = LRSArray[int(fd.nperm*0.37-1)] - else: - fd.suggestive = float(fd.suggestive) - if fd.significance == None: - fd.significance = LRSArray[int(fd.nperm*0.95-1)] - else: - fd.significance = float(fd.significance) - - self.significance =fd.significance - self.suggestive = fd.suggestive - self.highlysignificant = LRSArray[int(fd.nperm*0.99-1)] - _dispAllLRS = 0 - if fd.formdata.getvalue('displayAllLRS'): - _dispAllLRS = 1 - qtlresults2 = [] - if _dispAllLRS: - filtered = qtlresults[:] - else: - filtered = filter(lambda x, y=fd.suggestive: x.lrs > y, qtlresults) - if len(filtered) == 0: - qtlresults2 = qtlresults[:] - qtlresults2.sort() - filtered = qtlresults2[-10:] - - ######################################### - # Permutation Graph - ######################################### - myCanvas = pid.PILCanvas(size=(400,300)) - #plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification) - Plot.plotBar(myCanvas, LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test') - filename= webqtlUtil.genRandStr("Reg_") - myCanvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test') - - if fd.suggestive == None: - fd.suggestive = LRSArray[int(fd.nperm*0.37-1)] - else: - fd.suggestive = float(fd.suggestive) - if fd.significance == None: - fd.significance = LRSArray[int(fd.nperm*0.95-1)] - else: - fd.significance = float(fd.significance) - - permutationHeading = HT.Paragraph('Histogram of Permutation Test') - permutationHeading.__setattr__("class","title") - - permutation = HT.TableLite() - permutation.append(HT.TR(HT.TD(img))) - - - ######################################### - # Genome Association report - ######################################### - locusFormName = webqtlUtil.genRandStr("fm_") - locusForm = HT.Form(cgi = os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), \ - enctype='multipart/form-data', name=locusFormName, submit=HT.Input(type='hidden')) - hddn = {'FormID':'showDatabase','ProbeSetID':'_','database':fd.RISet+"Geno",'CellID':'_', \ - 'RISet':fd.RISet, 'incparentsf1':'on'} - for key in hddn.keys(): - locusForm.append(HT.Input(name=key, value=hddn[key], type='hidden')) - - regressionHeading = HT.Paragraph('Genome Association Report') - regressionHeading.__setattr__("class","title") - # report is the info part above report table - if qtlresults2 != []: - report = HT.Blockquote(HT.Font('No association ',color="#FF0000"),HT.Font('with a likelihood ratio statistic greater than %3.1f was found. Here are the top 10 LRSs.' % fd.suggestive,color="#000000")) - else: - report = HT.Blockquote('The following loci in the %s data set have associations with the above trait data.\n' % fd.RISet, HT.P()) - report.__setattr__("class","normalsize") - - fpText = open('%s.txt' % (webqtlConfig.TMPDIR+filename), 'wb') - fpText.write('Suggestive LRS =%3.2f\n'%self.suggestive) - fpText.write('Significant LRS =%3.2f\n'%self.significance) - fpText.write('Highly Significant LRS =%3.2f\n'%self.highlysignificant) - LRSInfo =HT.Paragraph(' Suggestive LRS =%3.2f\n'%fd.suggestive, HT.BR(), ' Significant LRS =%3.2f\n'%fd.significance,HT.BR(),' Highly Significant LRS =%3.2f\n' % self.highlysignificant) - - textUrl = HT.Href(text = 'Download', url= '/tmp/'+filename+'.txt', target = "_blank", Class='fs12 fwn') - - bottomInfo = HT.TR(HT.TD(HT.Paragraph(textUrl, ' result in tab-delimited text format.', HT.BR(), HT.BR(),'LRS values marked with',HT.Font(' * ',color="red"), 'are greater than the significance threshold (specified by you or by permutation test). ' , HT.BR(), HT.BR(), HT.Strong('Additive Effect'), ' is half the difference in the mean phenotype of all cases that are homozygous for one parental allel at this marker minus the mean of all cases that are homozygous for the other parental allele at this marker. ','In the case of %s strains, for example,' % fd.RISet,' A positive additive effect indicates that %s alleles increase trait values. Negative additive effect indicates that %s alleles increase trait values.'% (fd.ppolar,fd.mpolar),Class="fs12 fwn"))) - - tblobj={} # build dict for genTableObj function; keys include header and body - tblobj_header = [] # value of key 'header' - tblobj_body=[] # value of key 'body' - reportHeaderRow=[] # header row list for tblobj_header (html part) - headerStyle="fs14 fwb ffl b1 cw cbrb" # style of the header - cellColorStyle = "fs13 b1 fwn c222" # style of the cells - - headerList=['Index','LRS','Chr','Mb','Locus','Additive Effect'] - for ncol, item in enumerate(headerList): - reportHeaderRow.append(THCell(HT.TD(item, Class=headerStyle, valign='bottom',nowrap='ON'),text=item, idx=ncol)) - - if fd.genotype.type == 'intercross': - ncol =len(headerList) - reportHeaderRow.append(THCell(HT.TD('Dominance Effect', Class=headerStyle, valign='bottom',nowrap='ON'),text='Dominance Effect', idx=ncol)) - - #download file for table headers' names - fpText.write('LRS\tChromosome\tMb\tLocus\tAdditive Effect\tDominance Effect\n') - - index=1 - for ii in filtered: - #add by NL 06-20-2011: set LRS to 460 when LRS is infinite, - if ii.lrs==float('inf') or ii.lrs>webqtlConfig.MAXLRS: - LRS=webqtlConfig.MAXLRS #maximum LRS value - else: - LRS=ii.lrs - - if LRS > fd.significance: - lrs = HT.TD(HT.Font('%3.3f*' % LRS, color='#FF0000'),Class=cellColorStyle) - else: - lrs = HT.TD('%3.3f' % LRS,Class=cellColorStyle) - - if ii.locus.chr in ChrNameOrderIdDict.keys(): - chrOrderId =ChrNameOrderIdDict[ii.locus.chr] - else: - chrOrderId=ii.locus.chr - - reportBodyRow=[] # row list for tblobj_body (html part) - selectCheck=HT.Input(type="checkbox", Class="checkbox", name="index",value=index, onClick="highlight(this)") - reportBodyRow.append(TDCell(HT.TD(str(index),selectCheck, align='right',Class=cellColorStyle,nowrap='ON'),str(index),index)) - reportBodyRow.append(TDCell(lrs,LRS, LRS)) - reportBodyRow.append(TDCell(HT.TD(ii.locus.chr, Class=cellColorStyle, align="center",nowrap='ON'),ii.locus.chr, chrOrderId)) - reportBodyRow.append(TDCell(HT.TD('%3.6f'%ii.locus.Mb, Class=cellColorStyle, align="center",nowrap='ON'),ii.locus.Mb, ii.locus.Mb)) - reportBodyRow.append(TDCell(HT.TD(HT.Href(text=ii.locus.name, url = "javascript:showTrait('%s','%s');" % (locusFormName, ii.locus.name), Class='normalsize'), Class=cellColorStyle, align="center",nowrap='ON'),ii.locus.name, ii.locus.name)) - reportBodyRow.append(TDCell(HT.TD('%3.3f' % ii.additive, Class=cellColorStyle, align="center",nowrap='ON'),ii.additive, ii.additive)) - reportBodyRow.append(TDCell(HT.TD('%3.3f' % ii.dominance, Class=cellColorStyle, align="center",nowrap='ON'),ii.dominance, ii.dominance)) - - fpText.write('%2.3f\t%s\t%3.6f\t%s\t%2.3f\t%2.3f\n' % (LRS, ii.locus.chr, ii.locus.Mb, ii.locus.name, ii.additive, ii.dominance)) - index+=1 - tblobj_body.append(reportBodyRow) - else: - #download file for table headers' names - fpText.write('LRS\tChromosome\tMb\tLocus\tAdditive Effect\n') - - index=1 - for ii in filtered: - #add by NL 06-20-2011: set LRS to 460 when LRS is infinite, - if ii.lrs==float('inf') or ii.lrs>webqtlConfig.MAXLRS: - LRS=webqtlConfig.MAXLRS #maximum LRS value - else: - LRS=ii.lrs - - if LRS > fd.significance: - lrs = HT.TD(HT.Font('%3.3f*' % LRS, color='#FF0000'),Class=cellColorStyle) - else: - lrs = HT.TD('%3.3f' % LRS,Class=cellColorStyle) - - if ii.locus.chr in ChrNameOrderIdDict.keys(): - chrOrderId =ChrNameOrderIdDict[ii.locus.chr] - else: - chrOrderId=ii.locus.chr - - reportBodyRow=[] # row list for tblobj_body (html part) - selectCheck=HT.Input(type="checkbox", Class="checkbox", name="index",value=index, onClick="highlight(this)") - reportBodyRow.append(TDCell(HT.TD(str(index),selectCheck, align='right',Class=cellColorStyle,nowrap='ON'),str(index),index)) - reportBodyRow.append(TDCell(lrs,LRS, LRS)) - reportBodyRow.append(TDCell(HT.TD(ii.locus.chr, Class=cellColorStyle, align="center",nowrap='ON'),ii.locus.chr, chrOrderId)) - reportBodyRow.append(TDCell(HT.TD('%3.6f'%ii.locus.Mb, Class=cellColorStyle, align="center",nowrap='ON'),ii.locus.Mb, ii.locus.Mb)) - reportBodyRow.append(TDCell(HT.TD(HT.Href(text=ii.locus.name, url = "javascript:showTrait('%s','%s');" % (locusFormName, ii.locus.name), Class='normalsize'), Class=cellColorStyle, align="center",nowrap='ON'),ii.locus.name, ii.locus.name)) - reportBodyRow.append(TDCell(HT.TD('%3.3f' % ii.additive, Class=cellColorStyle, align="center",nowrap='ON'),ii.additive, ii.additive)) - - fpText.write('%2.3f\t%s\t%3.6f\t%s\t%2.3f\n' % (LRS, ii.locus.chr, ii.locus.Mb, ii.locus.name, ii.additive)) - index+=1 - tblobj_body.append(reportBodyRow) - - tblobj_header.append(reportHeaderRow) - tblobj['header']=tblobj_header - tblobj['body']=tblobj_body - - rv=HT.TD(regressionHeading,LRSInfo,report, locusForm, HT.P(),width='55%',valign='top', align='left', bgColor='#eeeeee') - if fd.genotype.type == 'intercross': - bottomInfo.append(HT.BR(), HT.BR(), HT.Strong('Dominance Effect'),' is the difference between the mean trait value of cases heterozygous at a marker and the average mean for the two groups homozygous at this marker: e.g., BD - (BB+DD)/2]. A positive dominance effect indicates that the average phenotype of BD heterozygotes exceeds the mean of BB and DD homozygotes. No dominance deviation can be computed for a set of recombinant inbred strains or for a backcross.') - return rv,tblobj,bottomInfo - - return rv,tblobj,bottomInfo - - def plotIntMappingForPLINK(self, fd, canvas, offset= (80, 120, 20, 80), zoom = 1, startMb = None, endMb = None, showLocusForm = "",plinkResultDict={}): - #calculating margins - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - xLeftOffset = int(xLeftOffset*fontZoom) - xRightOffset = int(xRightOffset*fontZoom) - yBottomOffset = int(yBottomOffset*fontZoom) - - cWidth = canvas.size[0] - cHeight = canvas.size[1] - plotWidth = cWidth - xLeftOffset - xRightOffset - plotHeight = cHeight - yTopOffset - yBottomOffset - startPixelX = xLeftOffset - endPixelX = (xLeftOffset + plotWidth) - - #Drawing Area Height - drawAreaHeight = plotHeight - if self.plotScale == 'physic' and self.selectedChr > -1: # for single chr - drawAreaHeight -= self.ENSEMBL_BAND_HEIGHT + self.UCSC_BAND_HEIGHT+ self.WEBQTL_BAND_HEIGHT + 3*self.BAND_SPACING+ 10*zoom - if self.geneChecked: - drawAreaHeight -= self.NUM_GENE_ROWS*self.EACH_GENE_HEIGHT + 3*self.BAND_SPACING + 10*zoom - else: - if self.selectedChr > -1: - drawAreaHeight -= 20 - else:# for all chrs - drawAreaHeight -= 30 - - #Image map - gifmap = HT.Map(name='WebQTLImageMap') - - newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) - # Draw the alternating-color background first and get plotXScale - plotXScale = self.drawGraphBackgroundForPLINK(canvas, gifmap, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb,plinkResultDict=plinkResultDict) - - # Draw X axis - self.drawXAxisForPLINK(fd, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) - # Draw manhattam plot - self.drawManhattanPlotForPLINK(canvas, drawAreaHeight, gifmap, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb,plinkResultDict=plinkResultDict,thresholdPvalue=self.pValue) - - return gifmap - - - def plotIntMapping(self, fd, canvas, offset= (80, 120, 20, 80), zoom = 1, startMb = None, endMb = None, showLocusForm = ""): - #calculating margins - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - xLeftOffset = int(xLeftOffset*fontZoom) - xRightOffset = int(xRightOffset*fontZoom) - yBottomOffset = int(yBottomOffset*fontZoom) - - cWidth = canvas.size[0] - cHeight = canvas.size[1] - plotWidth = cWidth - xLeftOffset - xRightOffset - plotHeight = cHeight - yTopOffset - yBottomOffset - startPixelX = xLeftOffset - endPixelX = (xLeftOffset + plotWidth) - - #Drawing Area Height - drawAreaHeight = plotHeight - if self.plotScale == 'physic' and self.selectedChr > -1: # for single chr - drawAreaHeight -= self.ENSEMBL_BAND_HEIGHT + self.UCSC_BAND_HEIGHT+ self.WEBQTL_BAND_HEIGHT + 3*self.BAND_SPACING+ 10*zoom - if self.geneChecked: - drawAreaHeight -= self.NUM_GENE_ROWS*self.EACH_GENE_HEIGHT + 3*self.BAND_SPACING + 10*zoom - else:# for all chrs - if self.selectedChr > -1: - drawAreaHeight -= 20 - else: - drawAreaHeight -= 30 - - #Image map - gifmap = HT.Map(name='WebQTLImageMap') - - newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) - # Draw the alternating-color background first and get plotXScale - plotXScale = self.drawGraphBackground(canvas, gifmap, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) - - # Draw X axis - self.drawXAxis(fd, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) - # Draw QTL curve - self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) - - #draw legend - if self.multipleInterval: - self.drawMultiTraitName(fd, canvas, gifmap, showLocusForm, offset=newoffset) - elif self.legendChecked: - self.drawLegendPanel(fd, canvas, offset=newoffset) - else: - pass - - #draw position, no need to use a separate function - if fd.genotype.Mbmap: - self.drawProbeSetPosition(canvas, plotXScale, offset=newoffset) - - return gifmap - - - # functions for manhattam plot of markers - def drawManhattanPlotForPLINK(self, canvas, drawAreaHeight, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None,plinkResultDict={},thresholdPvalue=-1): - - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - # INTERCROSS = (self.genotype.type=="intercross") - INTERCROSS ='' #?????? - - ChrLengthDistList = self.ChrLengthMbList - drawRegionDistance = self.ChrLengthMbSum - GraphInterval=self.GraphInterval - pvalueHeightThresh = drawAreaHeight - 80 #ZS: Otherwise the plot gets very close to the chromosome labels - - #draw the pvalue scale - #We first determine whether or not we are using a sliding scale. - #If so, we need to compute the maximum pvalue value to determine where the max y-value should be, and call this pvalueMax. - #pvalueTop is then defined to be above the pvalueMax by enough to add one additional pvalueScale increment. - #if we are using a set-scale, then we set pvalueTop to be the user's value, and pvalueMax doesn't matter. - - # for human data we use p value instead of lrs - pValueList=[] - for key in plinkResultDict: - valueList = plinkResultDict[key] - for item in valueList: - pValue = item[-1] - pValueList.append(pValue) - - formattedPValueList=[] - for pValue in pValueList: - try: - pValue=float(pValue) - except: - pValue =1 - formattedpValue = -math.log10(pValue) - formattedPValueList.append(formattedpValue) - - #sliding scale - pvalueMax = max(formattedPValueList) - #pvalueMax =pvalueMax +1 - # no permutation result for plink func: GenReport() - pvalueMin = int(-math.log10(thresholdPvalue)) - - if pvalueMax> 100: - pvalueScale = 20.0 - elif pvalueMax > 20: - pvalueScale = 5.0 - elif pvalueMax > 7.5: - pvalueScale = 2.5 - else: - pvalueScale = 1.0 - - # the base line for x-axis is -log(thresholdPvalue) - pvalueAxisList = Plot.frange(pvalueMin, pvalueMax, pvalueScale) - #make sure the user's value appears on the y-axis - #ZS: There is no way to do this without making the position of the points not directly proportional to a given distance on the y-axis - #tempPvalueMax=round(pvalueMax) - tempPvalueMax = pvalueAxisList[len(pvalueAxisList)-1] + pvalueScale - pvalueAxisList.append(tempPvalueMax) - - #ZS: I don't understand this; the if statement will be true for any number that isn't exactly X.5. - #if abs(tempPvalueMax-pvalueMax) <0.5: - # tempPvalueMax=tempPvalueMax+1 - # pvalueAxisList.append(tempPvalueMax) - - #draw the "pvalue" string to the left of the axis - pvalueScaleFont=pid.Font(ttf="verdana", size=14*fontZoom, bold=0) - pvalueLODFont=pid.Font(ttf="verdana", size=14*zoom*1.5, bold=0) - yZero = yTopOffset + plotHeight - - #yAxis label display area - yAxis_label ='-log(P)' - canvas.drawString(yAxis_label, xLeftOffset - canvas.stringWidth("999.99", font=pvalueScaleFont) - 10*zoom, \ - yZero - 150, font=pvalueLODFont, color=pid.black, angle=90) - - for i,item in enumerate(pvalueAxisList): - ypvalue = yZero - (float(i)/float(len(pvalueAxisList) - 1)) * pvalueHeightThresh - canvas.drawLine(xLeftOffset, ypvalue, xLeftOffset - 4, ypvalue, color=self.LRS_COLOR, width=1*zoom) - scaleStr = "%2.1f" % item - #added by NL 6-24-2011:Y-axis scale display - canvas.drawString(scaleStr, xLeftOffset-4-canvas.stringWidth(scaleStr, font=pvalueScaleFont)-5, ypvalue+3, font=pvalueScaleFont, color=self.LRS_COLOR) - - ChrList=self.ChrList - startPosX = xLeftOffset - - for i, chr in enumerate(ChrList): - - if plinkResultDict.has_key(chr): - plinkresultList = plinkResultDict[chr] - - m = 0 - #add by NL 06-24-2011: for mahanttam plot - symbolFont = pid.Font(ttf="fnt_bs", size=5,bold=0) - # color for point in each chr - chrCount=len(ChrList) - chrColorDict =self.getColorForMarker(chrCount=chrCount,flag=1) - for j, item in enumerate(plinkresultList): - try : - mb=float(item[1])/1000000.0 - except: - mb=0 - - try : - pvalue =float(item[-1]) - except: - pvalue =1 - - try: - snpName = item[0] - except: - snpName='' - - formattedPvalue = -math.log10(pvalue) - - Xc = startPosX + (mb-startMb)*plotXScale - Yc = yZero - (formattedPvalue-pvalueMin)*pvalueHeightThresh/(tempPvalueMax - pvalueMin) - canvas.drawString("5", Xc-canvas.stringWidth("5",font=symbolFont)/2+1,Yc+2,color=chrColorDict[i], font=symbolFont) - m += 1 - - startPosX += (ChrLengthDistList[i]+GraphInterval)*plotXScale - - canvas.drawLine(xLeftOffset, yZero, xLeftOffset, yTopOffset, color=self.LRS_COLOR, width=1*zoom) #the blue line running up the y axis - - def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): - - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - INTERCROSS = (self.genotype.type=="intercross") - - ChrLengthDistList = self.ChrLengthMbList - GraphInterval=self.GraphInterval - LRSHeightThresh = drawAreaHeight - AdditiveHeightThresh = drawAreaHeight/2 - DominanceHeightThresh = drawAreaHeight/2 - - #draw the LRS scale - #We first determine whether or not we are using a sliding scale. - #If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRSMax. - #LRSTop is then defined to be above the LRSMax by enough to add one additional LRSScale increment. - #if we are using a set-scale, then we set LRSTop to be the user's value, and LRSMax doesn't matter. - - if self.LRS_LOD == 'LOD': - lodm = self.LODFACTOR - else: - lodm = 1.0 - - if self.lrsMax <= 0: #sliding scale - LRSMax = max(map(max, self.qtlresults)).lrs - #genotype trait will give infinite LRS - LRSMax = min(LRSMax, webqtlConfig.MAXLRS) - LRSMax = max(self.significance, LRSMax) - else: - LRSMax = self.lrsMax*lodm - - if LRSMax/lodm > 100: - LRSScale = 20.0 - elif LRSMax/lodm > 20: - LRSScale = 5.0 - elif LRSMax/lodm > 7.5: - LRSScale = 2.5 - else: - LRSScale = 1.0 - - LRSAxisList = Plot.frange(LRSScale, LRSMax/lodm, LRSScale) - #make sure the user's value appears on the y-axis - #update by NL 6-21-2011: round the LOD value to 100 when LRSMax is equal to 460 - LRSAxisList.append(round(LRSMax/lodm)) - - #draw the "LRS" or "LOD" string to the left of the axis - LRSScaleFont=pid.Font(ttf="verdana", size=14*fontZoom, bold=0) - LRSLODFont=pid.Font(ttf="verdana", size=14*zoom*1.5, bold=0) - yZero = yTopOffset + plotHeight - - #yAxis label display area - canvas.drawString(self.LRS_LOD, xLeftOffset - canvas.stringWidth("999.99", font=LRSScaleFont) - 10*zoom, \ - yZero - 150, font=LRSLODFont, color=pid.black, angle=90) - - for item in LRSAxisList: - yLRS = yZero - (item*lodm/LRSMax) * LRSHeightThresh - canvas.drawLine(xLeftOffset, yLRS, xLeftOffset - 4, yLRS, color=self.LRS_COLOR, width=1*zoom) - scaleStr = "%2.1f" % item - #added by NL 6-24-2011:Y-axis scale display - canvas.drawString(scaleStr, xLeftOffset-4-canvas.stringWidth(scaleStr, font=LRSScaleFont)-5, yLRS+3, font=LRSScaleFont, color=self.LRS_COLOR) - - - #"Significant" and "Suggestive" Drawing Routine - # ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...) - if self.permChecked and not self.multipleInterval: - significantY = yZero - self.significance*LRSHeightThresh/LRSMax - suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSMax - - - startPosX = xLeftOffset - for i, _chr in enumerate(self.genotype): - rightEdge = int(startPosX + self.ChrLengthDistList[i]*plotXScale - self.SUGGESTIVE_WIDTH/1.5) - #added by NL 6-24-2011:draw suggestive line (grey one) - canvas.drawLine(startPosX+self.SUGGESTIVE_WIDTH/1.5, suggestiveY, rightEdge, suggestiveY, color=self.SUGGESTIVE_COLOR, - width=self.SUGGESTIVE_WIDTH*zoom, clipX=(xLeftOffset, xLeftOffset + plotWidth-2)) - #added by NL 6-24-2011:draw significant line (pink one) - canvas.drawLine(startPosX+self.SUGGESTIVE_WIDTH/1.5, significantY, rightEdge, significantY, color=self.SIGNIFICANT_COLOR, - width=self.SIGNIFICANT_WIDTH*zoom, clipX=(xLeftOffset, xLeftOffset + plotWidth-2)) - sugg_coords = "%d, %d, %d, %d" % (startPosX, suggestiveY-2, rightEdge + 2*zoom, suggestiveY+2) - sig_coords = "%d, %d, %d, %d" % (startPosX, significantY-2, rightEdge + 2*zoom, significantY+2) - if self.LRS_LOD == 'LRS': - sugg_title = "Suggestive LRS = %0.2f" % self.suggestive - sig_title = "Significant LRS = %0.2f" % self.significance - else: - sugg_title = "Suggestive LOD = %0.2f" % (self.suggestive/4.61) - sig_title = "Significant LOD = %0.2f" % (self.significance/4.61) - Areas1 = HT.Area(shape='rect',coords=sugg_coords,title=sugg_title) - Areas2 = HT.Area(shape='rect',coords=sig_coords,title=sig_title) - gifmap.areas.append(Areas1) - gifmap.areas.append(Areas2) - - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale - - - if self.multipleInterval: - lrsEdgeWidth = 1 - else: - additiveMax = max(map(lambda X : abs(X.additive), self.qtlresults[0])) - if INTERCROSS: - dominanceMax = max(map(lambda X : abs(X.dominance), self.qtlresults[0])) - else: - dominanceMax = -1 - lrsEdgeWidth = 2 - for i, qtlresult in enumerate(self.qtlresults): - m = 0 - startPosX = xLeftOffset - thisLRSColor = self.colorCollection[i] - - #add by NL 06-24-2011: for mahanttam plot - symbolFont = pid.Font(ttf="fnt_bs", size=5,bold=0) - - for j, _chr in enumerate(self.genotype): - chrCount=len(self.genotype) - chrColorDict =self.getColorForMarker(chrCount=chrCount,flag=1) - LRSCoordXY = [] - AdditiveCoordXY = [] - DominanceCoordXY = [] - for k, _locus in enumerate(_chr): - if self.plotScale == 'physic': - Xc = startPosX + (_locus.Mb-startMb)*plotXScale - else: - Xc = startPosX + (_locus.cM-_chr[0].cM)*plotXScale - # updated by NL 06-18-2011: - # fix the over limit LRS graph issue since genotype trait may give infinite LRS; - # for any lrs is over than 460(LRS max in this system), it will be reset to 460 - if qtlresult[m].lrs> 460 or qtlresult[m].lrs=='inf': - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSMax - else: - Yc = yZero - qtlresult[m].lrs*LRSHeightThresh/LRSMax - - LRSCoordXY.append((Xc, Yc)) - #add by NL 06-24-2011: for mahanttam plot - #self.significance/4.61 consider chr and LOD - # significantY = yZero - self.significance*LRSHeightThresh/LRSMax - # if Yc >significantY: - # canvas.drawString(":", Xc-canvas.stringWidth(":",font=symbolFont)/2+1,Yc+2,color=pid.black, font=symbolFont) - # else: - # canvas.drawString(":", Xc-canvas.stringWidth(":",font=symbolFont)/2+1,Yc+2,color=pid.black, font=symbolFont) - - # add by NL 06-27-2011: eliminate imputed value when locus name is equal to '-' - if (qtlresult[m].locus.name) and (qtlresult[m].locus.name!=' - '): - canvas.drawString("5", Xc-canvas.stringWidth("5",font=symbolFont)/2+1,Yc+2,color=chrColorDict[j], font=symbolFont) - - if not self.multipleInterval and self.additiveChecked: - Yc = yZero - qtlresult[m].additive*AdditiveHeightThresh/additiveMax - AdditiveCoordXY.append((Xc, Yc)) - if not self.multipleInterval and INTERCROSS and self.additiveChecked: - Yc = yZero - qtlresult[m].dominance*DominanceHeightThresh/dominanceMax - DominanceCoordXY.append((Xc, Yc)) - m += 1 - - startPosX += (ChrLengthDistList[j]+GraphInterval)*plotXScale - - - ###draw additive scale - if not self.multipleInterval and self.additiveChecked: - additiveScaleFont=pid.Font(ttf="verdana",size=12*fontZoom,bold=0) - additiveScale = Plot.detScaleOld(0,additiveMax) - additiveStep = (additiveScale[1]-additiveScale[0])/additiveScale[2] - additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) - maxAdd = additiveScale[1] - addPlotScale = AdditiveHeightThresh/additiveMax - - additiveAxisList.append(additiveScale[1]) - for item in additiveAxisList: - additiveY = yZero - item*addPlotScale - canvas.drawLine(xLeftOffset + plotWidth,additiveY,xLeftOffset+4+ plotWidth,additiveY,color=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) - scaleStr = "%2.3f" % item - canvas.drawString(scaleStr,xLeftOffset + plotWidth +6,additiveY+5,font=additiveScaleFont,color=self.ADDITIVE_COLOR_POSITIVE) - - canvas.drawLine(xLeftOffset+plotWidth,additiveY,xLeftOffset+plotWidth,yZero,color=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) - - canvas.drawLine(xLeftOffset, yZero, xLeftOffset, yTopOffset, color=self.LRS_COLOR, width=1*zoom) #the blue line running up the y axis - - def drawGraphBackgroundForPLINK(self, canvas, gifmap, offset= (80, 120, 80, 50), zoom = 1, startMb = None, endMb = None,plinkResultDict={} ): - - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - #calculate plot scale - #XZ: all of these global variables should be passed from function signiture - ChrLengthDistList = self.ChrLengthMbList - drawRegionDistance = self.ChrLengthMbSum - GraphInterval=self.GraphInterval - ChrList =self.ChrList - - #multiple chromosome view - plotXScale = plotWidth / ((len(ChrList)-1)*GraphInterval + drawRegionDistance) - - startPosX = xLeftOffset - chrLabelFont=pid.Font(ttf="verdana",size=24*fontZoom,bold=0) - - for i, _chr in enumerate(ChrList): - - if (i % 2 == 0): - theBackColor = self.GRAPH_BACK_DARK_COLOR - else: - theBackColor = self.GRAPH_BACK_LIGHT_COLOR - # NL:resize chr width for drawing - if float(ChrLengthDistList[i])<90: - ChrLengthDistList[i]=90 - #draw the shaded boxes and the sig/sug thick lines - canvas.drawRect(startPosX, yTopOffset, startPosX + ChrLengthDistList[i]*plotXScale, \ - yTopOffset+plotHeight, edgeColor=pid.gainsboro,fillColor=theBackColor) - - chrNameWidth = canvas.stringWidth(_chr, font=chrLabelFont) - chrStartPix = startPosX + (ChrLengthDistList[i]*plotXScale -chrNameWidth)/2 - chrEndPix = startPosX + (ChrLengthDistList[i]*plotXScale +chrNameWidth)/2 - - canvas.drawString(_chr, chrStartPix, yTopOffset +20,font = chrLabelFont,color=pid.dimgray) - COORDS = "%d,%d,%d,%d" %(chrStartPix, yTopOffset, chrEndPix,yTopOffset +20) - - #add by NL 09-03-2010 - HREF = "javascript:changeView(%d,%s);" % (i,ChrLengthDistList) - Areas = HT.Area(shape='rect',coords=COORDS,href=HREF) - gifmap.areas.append(Areas) - startPosX += (ChrLengthDistList[i]+GraphInterval)*plotXScale - - return plotXScale - - - def drawGraphBackground(self, canvas, gifmap, offset= (80, 120, 80, 50), zoom = 1, startMb = None, endMb = None): - ##conditions - ##multiple Chromosome view - ##single Chromosome Physical - ##single Chromosome Genetic - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - #calculate plot scale - if self.plotScale != 'physic': - self.ChrLengthDistList = self.ChrLengthCMList - drawRegionDistance = self.ChrLengthCMSum - else: - self.ChrLengthDistList = self.ChrLengthMbList - drawRegionDistance = self.ChrLengthMbSum - - if self.selectedChr > -1: #single chromosome view - spacingAmt = plotWidth/13.5 - i = 0 - for startPix in Plot.frange(xLeftOffset, xLeftOffset+plotWidth, spacingAmt): - if (i % 2 == 0): - theBackColor = self.GRAPH_BACK_DARK_COLOR - else: - theBackColor = self.GRAPH_BACK_LIGHT_COLOR - i += 1 - canvas.drawRect(startPix, yTopOffset, min(startPix+spacingAmt, xLeftOffset+plotWidth), \ - yTopOffset+plotHeight, edgeColor=theBackColor, fillColor=theBackColor) - - drawRegionDistance = self.ChrLengthDistList[self.selectedChr] - self.ChrLengthDistList = [drawRegionDistance] - if self.plotScale == 'physic': - plotXScale = plotWidth / (endMb-startMb) - else: - plotXScale = plotWidth / drawRegionDistance - - else: #multiple chromosome view - plotXScale = plotWidth / ((len(self.genotype)-1)*self.GraphInterval + drawRegionDistance) - - startPosX = xLeftOffset - chrLabelFont=pid.Font(ttf="verdana",size=24*fontZoom,bold=0) - - for i, _chr in enumerate(self.genotype): - - if (i % 2 == 0): - theBackColor = self.GRAPH_BACK_DARK_COLOR - else: - theBackColor = self.GRAPH_BACK_LIGHT_COLOR - - #draw the shaded boxes and the sig/sug thick lines - canvas.drawRect(startPosX, yTopOffset, startPosX + self.ChrLengthDistList[i]*plotXScale, \ - yTopOffset+plotHeight, edgeColor=pid.gainsboro,fillColor=theBackColor) - - chrNameWidth = canvas.stringWidth(_chr.name, font=chrLabelFont) - chrStartPix = startPosX + (self.ChrLengthDistList[i]*plotXScale -chrNameWidth)/2 - chrEndPix = startPosX + (self.ChrLengthDistList[i]*plotXScale +chrNameWidth)/2 - - canvas.drawString(_chr.name, chrStartPix, yTopOffset +20,font = chrLabelFont,color=pid.dimgray) - COORDS = "%d,%d,%d,%d" %(chrStartPix, yTopOffset, chrEndPix,yTopOffset +20) - - #add by NL 09-03-2010 - HREF = "javascript:changeView(%d,%s);" % (i,self.ChrLengthMbList) - Areas = HT.Area(shape='rect',coords=COORDS,href=HREF) - gifmap.areas.append(Areas) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale - - return plotXScale - - # XZ: The only difference of function drawXAxisForPLINK and function drawXAxis are the function name and the self.plotScale condition. - def drawXAxisForPLINK(self, fd, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - yZero = canvas.size[1] - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - #Parameters - ChrLengthDistList = self.ChrLengthMbList - GraphInterval=self.GraphInterval - - NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks - X_MAJOR_TICK_THICKNESS = 2 - X_MINOR_TICK_THICKNESS = 1 - X_AXIS_THICKNESS = 1*zoom - - # ======= Alex: Draw the X-axis labels (megabase location) - MBLabelFont = pid.Font(ttf="verdana", size=12*fontZoom, bold=0) - xMajorTickHeight = 15 # How high the tick extends below the axis - xMinorTickHeight = 5*zoom - xAxisTickMarkColor = pid.black - xAxisLabelColor = pid.black - fontHeight = 12*fontZoom # How tall the font that we're using is - spacingFromLabelToAxis = 20 - spacingFromLineToLabel = 3 - - if self.plotScale == 'physic': - strYLoc = yZero + spacingFromLabelToAxis + canvas.fontHeight(MBLabelFont) - ###Physical single chromosome view - if self.selectedChr > -1: - graphMbWidth = endMb - startMb - XScale = Plot.detScale(startMb, endMb) - XStart, XEnd, XStep = XScale - if XStep < 8: - XStep *= 2 - spacingAmtX = spacingAmt = (XEnd-XStart)/XStep - - j = 0 - while abs(spacingAmtX -int(spacingAmtX)) >= spacingAmtX/100.0 and j < 6: - j += 1 - spacingAmtX *= 10 - - formatStr = '%%2.%df' % j - - for counter, _Mb in enumerate(Plot.frange(XStart, XEnd, spacingAmt / NUM_MINOR_TICKS)): - if _Mb < startMb or _Mb > endMb: - continue - Xc = xLeftOffset + plotXScale*(_Mb - startMb) - if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark - canvas.drawLine(Xc, yZero, Xc, yZero+xMajorTickHeight, color=xAxisTickMarkColor, width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark - labelStr = str(formatStr % _Mb) # What Mbase location to put on the label - strWidth = canvas.stringWidth(labelStr, font=MBLabelFont) - drawStringXc = (Xc - (strWidth / 2.0)) - canvas.drawString(labelStr, drawStringXc, strYLoc, font=MBLabelFont, color=xAxisLabelColor, angle=0) - else: - canvas.drawLine(Xc, yZero, Xc, yZero+xMinorTickHeight, color=xAxisTickMarkColor, width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark - # end else - - ###Physical genome wide view - else: - distScale = 0 - startPosX = xLeftOffset - for i, distLen in enumerate(ChrLengthDistList): - if distScale == 0: #universal scale in whole genome mapping - if distLen > 75: - distScale = 25 - elif distLen > 30: - distScale = 10 - else: - distScale = 5 - for tickdists in range(distScale, ceil(distLen), distScale): - canvas.drawLine(startPosX + tickdists*plotXScale, yZero, startPosX + tickdists*plotXScale, yZero + 7, color=pid.black, width=1*zoom) - canvas.drawString(str(tickdists), startPosX+tickdists*plotXScale, yZero + 10*zoom, color=pid.black, font=MBLabelFont, angle=270) - startPosX += (ChrLengthDistList[i]+GraphInterval)*plotXScale - - megabaseLabelFont = pid.Font(ttf="verdana", size=14*zoom*1.5, bold=0) - canvas.drawString("Megabases", xLeftOffset + (plotWidth -canvas.stringWidth("Megabases", font=megabaseLabelFont))/2, - strYLoc + canvas.fontHeight(MBLabelFont) + 5*zoom, font=megabaseLabelFont, color=pid.black) - pass - - canvas.drawLine(xLeftOffset, yZero, xLeftOffset+plotWidth, yZero, color=pid.black, width=X_AXIS_THICKNESS) # Draw the X axis itself - - def drawXAxis(self, fd, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - yZero = canvas.size[1] - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - #Parameters - NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks - X_MAJOR_TICK_THICKNESS = 2 - X_MINOR_TICK_THICKNESS = 1 - X_AXIS_THICKNESS = 1*zoom - - # ======= Alex: Draw the X-axis labels (megabase location) - MBLabelFont = pid.Font(ttf="verdana", size=12*fontZoom, bold=0) - xMajorTickHeight = 15 # How high the tick extends below the axis - xMinorTickHeight = 5*zoom - xAxisTickMarkColor = pid.black - xAxisLabelColor = pid.black - fontHeight = 12*fontZoom # How tall the font that we're using is - spacingFromLabelToAxis = 20 - spacingFromLineToLabel = 3 - - if self.plotScale == 'physic': - strYLoc = yZero + spacingFromLabelToAxis + canvas.fontHeight(MBLabelFont) - ###Physical single chromosome view - if self.selectedChr > -1: - graphMbWidth = endMb - startMb - XScale = Plot.detScale(startMb, endMb) - XStart, XEnd, XStep = XScale - if XStep < 8: - XStep *= 2 - spacingAmtX = spacingAmt = (XEnd-XStart)/XStep - - j = 0 - while abs(spacingAmtX -int(spacingAmtX)) >= spacingAmtX/100.0 and j < 6: - j += 1 - spacingAmtX *= 10 - - formatStr = '%%2.%df' % j - - for counter, _Mb in enumerate(Plot.frange(XStart, XEnd, spacingAmt / NUM_MINOR_TICKS)): - if _Mb < startMb or _Mb > endMb: - continue - Xc = xLeftOffset + plotXScale*(_Mb - startMb) - if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark - canvas.drawLine(Xc, yZero, Xc, yZero+xMajorTickHeight, color=xAxisTickMarkColor, width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark - labelStr = str(formatStr % _Mb) # What Mbase location to put on the label - strWidth = canvas.stringWidth(labelStr, font=MBLabelFont) - drawStringXc = (Xc - (strWidth / 2.0)) - canvas.drawString(labelStr, drawStringXc, strYLoc, font=MBLabelFont, color=xAxisLabelColor, angle=0) - else: - canvas.drawLine(Xc, yZero, Xc, yZero+xMinorTickHeight, color=xAxisTickMarkColor, width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark - # end else - - ###Physical genome wide view - else: - distScale = 0 - startPosX = xLeftOffset - for i, distLen in enumerate(self.ChrLengthDistList): - if distScale == 0: #universal scale in whole genome mapping - if distLen > 75: - distScale = 25 - elif distLen > 30: - distScale = 10 - else: - distScale = 5 - for tickdists in range(distScale, ceil(distLen), distScale): - canvas.drawLine(startPosX + tickdists*plotXScale, yZero, startPosX + tickdists*plotXScale, yZero + 7, color=pid.black, width=1*zoom) - canvas.drawString(str(tickdists), startPosX+tickdists*plotXScale, yZero + 10*zoom, color=pid.black, font=MBLabelFont, angle=270) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale - - megabaseLabelFont = pid.Font(ttf="verdana", size=14*zoom*1.5, bold=0) - canvas.drawString("Megabases", xLeftOffset + (plotWidth -canvas.stringWidth("Megabases", font=megabaseLabelFont))/2, - strYLoc + canvas.fontHeight(MBLabelFont) + 5*zoom, font=megabaseLabelFont, color=pid.black) - pass - else: - ChrAInfo = [] - preLpos = -1 - distinctCount = 0.0 - if len(self.genotype) > 1: - for i, _chr in enumerate(self.genotype): - thisChr = [] - Locus0CM = _chr[0].cM - nLoci = len(_chr) - if nLoci <= 8: - for _locus in _chr: - if _locus.name != ' - ': - if _locus.cM != preLpos: - distinctCount += 1 - preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) - else: - for j in (0, nLoci/4, nLoci/2, nLoci*3/4, -1): - while _chr[j].name == ' - ': - j += 1 - if _chr[j].cM != preLpos: - distinctCount += 1 - preLpos = _chr[j].cM - thisChr.append([_chr[j].name, _chr[j].cM-Locus0CM]) - ChrAInfo.append(thisChr) - else: - for i, _chr in enumerate(self.genotype): - thisChr = [] - Locus0CM = _chr[0].cM - for _locus in _chr: - if _locus.name != ' - ': - if _locus.cM != preLpos: - distinctCount += 1 - preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) - ChrAInfo.append(thisChr) - - stepA = (plotWidth+0.0)/distinctCount - - LRectWidth = 10 - LRectHeight = 3 - offsetA = -stepA - lineColor = pid.lightblue - startPosX = xLeftOffset - for j, ChrInfo in enumerate(ChrAInfo): - preLpos = -1 - for i, item in enumerate(ChrInfo): - Lname,Lpos = item - if Lpos != preLpos: - offsetA += stepA - differ = 1 - else: - differ = 0 - preLpos = Lpos - Lpos *= plotXScale - if self.selectedChr > -1: - Zorder = i % 5 - else: - Zorder = 0 - if differ: - canvas.drawLine(startPosX+Lpos,yZero,xLeftOffset+offsetA,\ - yZero+25, color=lineColor) - canvas.drawLine(xLeftOffset+offsetA,yZero+25,xLeftOffset+offsetA,\ - yZero+40+Zorder*(LRectWidth+3),color=lineColor) - rectColor = pid.orange - else: - canvas.drawLine(xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)-3,\ - xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3),color=lineColor) - rectColor = pid.deeppink - canvas.drawRect(xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3),\ - xLeftOffset+offsetA-LRectHeight,yZero+40+Zorder*(LRectWidth+3)+LRectWidth,\ - edgeColor=rectColor,fillColor=rectColor,edgeWidth = 0) - COORDS="%d,%d,%d,%d"%(xLeftOffset+offsetA-LRectHeight, yZero+40+Zorder*(LRectWidth+3),\ - xLeftOffset+offsetA,yZero+40+Zorder*(LRectWidth+3)+LRectWidth) - HREF="javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm,fd.RISet+"Geno", Lname) - Areas=HT.Area(shape='rect',coords=COORDS,href=HREF, title="Locus : " + Lname) - gifmap.areas.append(Areas) - ##piddle bug - if j == 0: - canvas.drawLine(startPosX,yZero,startPosX,yZero+40, color=lineColor) - startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale - - canvas.drawLine(xLeftOffset, yZero, xLeftOffset+plotWidth, yZero, color=pid.black, width=X_AXIS_THICKNESS) # Draw the X axis itself - - def getColorForMarker(self, chrCount,flag):# no change is needed - chrColorDict={} - for i in range(chrCount): - if flag==1: # display blue and lightblue intercross - chrColorDict[i]=pid.black - elif flag==0: - if (i%2==0): - chrColorDict[i]=pid.blue - else: - chrColorDict[i]=pid.lightblue - else:#display different color for different chr - if i in [0,8,16]: - chrColorDict[i]=pid.black - elif i in [1,9,17]: - chrColorDict[i]=pid.red - elif i in [2,10,18]: - chrColorDict[i]=pid.lightgreen - elif i in [3,11,19]: - chrColorDict[i]=pid.blue - elif i in [4,12]: - chrColorDict[i]=pid.lightblue - elif i in [5,13]: - chrColorDict[i]=pid.hotpink - elif i in [6,14]: - chrColorDict[i]=pid.gold - elif i in [7,15]: - chrColorDict[i]=pid.grey - - return chrColorDict - - - def drawProbeSetPosition(self, canvas, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): - if len(self.traitList) != 1: - return - - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - yZero = canvas.size[1] - yBottomOffset - fontZoom = zoom - if zoom == 2: - fontZoom = 1.5 - - try: - Chr = self.traitList[0].chr # self.traitListChr =self.traitList[0].chr=_vals need to change to chrList and mbList - Mb = self.traitList[0].mb # self.traitListMb =self.traitList[0].mb=_vals - except: - return - - if self.plotScale == 'physic': - if self.selectedChr > -1: - if self.genotype[0].name != Chr or Mb < self.startMb or Mb > self.endMb: - return - else: - locPixel = xLeftOffset + (Mb-self.startMb)*plotXScale - else: - locPixel = xLeftOffset - for i, _chr in enumerate(self.genotype): - if _chr.name != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale - else: - locPixel += Mb*plotXScale - break - else: - if self.selectedChr > -1: - if self.genotype[0].name != Chr: - return - else: - for i, _locus in enumerate(self.genotype[0]): - #the trait's position is on the left of the first genotype - if i==0 and _locus.Mb >= Mb: - locPixel=-1 - break - - #the trait's position is between two traits - if i > 0 and self.genotype[0][i-1].Mb < Mb and _locus.Mb >= Mb: - locPixel = xLeftOffset + plotXScale*(self.genotype[0][i-1].cM+(_locus.cM-self.genotype[0][i-1].cM)*(Mb -self.genotype[0][i-1].Mb)/(_locus.Mb-self.genotype[0][i-1].Mb)) - break - - #the trait's position is on the right of the last genotype - if i==len(self.genotype[0]) and Mb>=_locus.Mb: - locPixel = -1 - else: - locPixel = xLeftOffset - for i, _chr in enumerate(self.genotype): - if _chr.name != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale - else: - locPixel += (Mb*(_chr[-1].cM-_chr[0].cM)/self.ChrLengthCMList[i])*plotXScale - break - if locPixel >= 0: - traitPixel = ((locPixel, yZero), (locPixel-6, yZero+12), (locPixel+6, yZero+12)) - canvas.drawPolygon(traitPixel, edgeColor=pid.black, fillColor=self.TRANSCRIPT_LOCATION_COLOR, closed=1) - - if self.legendChecked: - startPosY = 15 - nCol = 2 - smallLabelFont = pid.Font(ttf="trebuc", size=12, bold=1) - leftOffset = xLeftOffset+(nCol-1)*200 - canvas.drawPolygon(((leftOffset+6, startPosY-6), (leftOffset, startPosY+6), (leftOffset+12, startPosY+6)), edgeColor=pid.black, fillColor=self.TRANSCRIPT_LOCATION_COLOR, closed=1) - canvas.drawString("Sequence Site", (leftOffset+15), (startPosY+5), smallLabelFont, self.TOP_RIGHT_INFO_COLOR) - - # build dict based on plink result, key is chr, value is list of [snp,BP,pValue] - def getPlinkResultDict(self,outputFileName='',thresholdPvalue=-1,ChrOrderIdNameDict={}): - - ChrList =self.ChrList - plinkResultDict={} - - plinkResultfp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, outputFileName), "rb") - - headerLine=plinkResultfp.readline()# read header line - line = plinkResultfp.readline() - - valueList=[] # initialize value list, this list will include snp, bp and pvalue info - pValueList=[] - count=0 - - while line: - #convert line from str to list - lineList=self.buildLineList(line=line) - - # only keep the records whose chromosome name is in db - if ChrOrderIdNameDict.has_key(int(lineList[0])) and lineList[-1] and lineList[-1].strip()!='NA': - - chrName=ChrOrderIdNameDict[int(lineList[0])] - snp = lineList[1] - BP = lineList[2] - pValue = float(lineList[-1]) - pValueList.append(pValue) - - if plinkResultDict.has_key(chrName): - valueList=plinkResultDict[chrName] - - # pvalue range is [0,1] - if thresholdPvalue >=0 and thresholdPvalue<=1: - if pValue < thresholdPvalue: - valueList.append((snp,BP,pValue)) - count+=1 - - plinkResultDict[chrName]=valueList - valueList=[] - else: - if thresholdPvalue>=0 and thresholdPvalue<=1: - if pValue < thresholdPvalue: - valueList.append((snp,BP,pValue)) - count+=1 - - if valueList: - plinkResultDict[chrName]=valueList - - valueList=[] - - - line =plinkResultfp.readline() - else: - line=plinkResultfp.readline() - - if pValueList: - minPvalue= min(pValueList) - else: - minPvalue=0 - - return count,minPvalue,plinkResultDict - - - ###################################################### - # input: line: str,one line read from file - # function: convert line from str to list; - # output: lineList list - ####################################################### - def buildLineList(self,line=None): - - lineList = string.split(string.strip(line),' ')# irregular number of whitespaces between columns - lineList =[ item for item in lineList if item <>''] - lineList = map(string.strip, lineList) - - return lineList - - #added by NL: automatically generate pheno txt file for PLINK based on strainList passed from dataEditing page - def genPhenoTxtFileForPlink(self,phenoFileName='', RISetName='', probesetName='', valueDict={}): - pedFileStrainList=self.getStrainNameFromPedFile(RISetName=RISetName) - outputFile = open("%s%s.txt"%(webqtlConfig.TMPDIR,phenoFileName),"wb") - headerLine = 'FID\tIID\t%s\n'%probesetName - outputFile.write(headerLine) - - newValueList=[] - - #if valueDict does not include some strain, value will be set to -9999 as missing value - for item in pedFileStrainList: - try: - value=valueDict[item] - value=str(value).replace('value=','') - value=value.strip() - except: - value=-9999 - - newValueList.append(value) - - - newLine='' - for i, strain in enumerate(pedFileStrainList): - j=i+1 - value=newValueList[i] - newLine+='%s\t%s\t%s\n'%(strain, strain, value) - - if j%1000==0: - outputFile.write(newLine) - newLine='' - - if newLine: - outputFile.write(newLine) - - outputFile.close() - - # get strain name from ped file in order - def getStrainNameFromPedFile(self, RISetName=''): - pedFileopen= open("%splink/%s.ped"%(webqtlConfig.GENODIR, RISetName),"r") - line =pedFileopen.readline() - strainNameList=[] - - while line: - lineList=string.split(string.strip(line),'\t') - lineList=map(string.strip,lineList) - - strainName=lineList[0] - strainNameList.append(strainName) - - line =pedFileopen.readline() - - return strainNameList - - ################################################################ - # Generate Chr list, Chr OrderId and Retrieve Length Information - ################################################################ - def getChrNameOrderIdLength(self,RISet=''): - - try: - query = """ - Select - Chr_Length.Name,Chr_Length.OrderId,Length from Chr_Length, InbredSet - where - Chr_Length.SpeciesId = InbredSet.SpeciesId AND - InbredSet.Name = '%s' - Order by OrderId - """ % (RISet) - self.cursor.execute(query) - - results =self.cursor.fetchall() - ChrList=[] - ChrLengthMbList=[] - ChrNameOrderIdDict={} - ChrOrderIdNameDict={} - - for item in results: - ChrList.append(item[0]) - ChrNameOrderIdDict[item[0]]=item[1] # key is chr name, value is orderId - ChrOrderIdNameDict[item[1]]=item[0] # key is orderId, value is chr name - ChrLengthMbList.append(item[2]) - - except: - ChrList=[] - ChrNameOrderIdDict={} - ChrLengthMbList=[] - - return ChrList,ChrNameOrderIdDict,ChrOrderIdNameDict,ChrLengthMbList diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 3ec61e55..bfb63995 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -16,7 +16,6 @@ import uuid import rpy2.robjects as ro import numpy as np -from scipy import linalg import cPickle as pickle import itertools @@ -84,7 +83,6 @@ class MarkerRegression(object): self.geno_db_exists = start_vars['geno_db_exists'] else: try: - geno_dataset = data_set.create_dataset(self.dataset.group.name + "Geno") self.geno_db_exists = "True" except: self.geno_db_exists = "False" @@ -278,7 +276,6 @@ class MarkerRegression(object): ) else: - self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: @@ -426,7 +423,6 @@ class MarkerRegression(object): if self.dataset.group.species == "human": p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) - #p_values = self.trim_results(p_values) else: logger.debug("NOW CWD IS:", os.getcwd()) @@ -478,8 +474,6 @@ class MarkerRegression(object): json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] - #logger.debug("p_values:", p_values[:10]) - #p_values = self.trim_results(p_values) t_stats = results['t_stats'] #t_stats, p_values = lmm.run( @@ -493,19 +487,8 @@ class MarkerRegression(object): self.dataset.group.markers.add_pvalues(p_values) - #self.get_lod_score_cutoff() - return self.dataset.group.markers.markers - def trim_results(self, p_values): - logger.debug("len_p_values:", len(p_values)) - if len(p_values) > 500: - p_values.sort(reverse=True) - trimmed_values = p_values[:500] - - return trimmed_values - - #def gen_human_results(self, pheno_vector, tempdata): def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = locate(self.dataset.group.name,"mapping") @@ -562,18 +545,6 @@ class MarkerRegression(object): return p_values, t_stats - def get_lod_score_cutoff(self): - logger.debug("INSIDE GET LOD CUTOFF") - high_qtl_count = 0 - for marker in self.dataset.group.markers.markers: - if marker['lod_score'] > 1: - high_qtl_count += 1 - - if high_qtl_count > 1000: - return 1 - else: - return 0 - def identify_empty_samples(self): no_val_samples = [] for sample_count, val in enumerate(self.vals): @@ -597,28 +568,6 @@ class MarkerRegression(object): trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data -def create_snp_iterator_file(group): - """ - This function is only called by main below - """ - raise Exception("Paths are undefined here") - plink_file_base = os.path.join(TMPDIR, group) - plink_input = input.plink(plink_file_base, type='b') - - data = dict(plink_input = list(plink_input), - numSNPs = plink_input.numSNPs) - - #input_dict = {} - # - #input_dict['plink_input'] = list(plink_input) - #input_dict['numSNPs'] = plink_input.numSNPs - # - - snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz") - - with gzip.open(snp_file_base, "wb") as fh: - pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL) - def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type): with open(results_path, "w+") as output_file: output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") @@ -652,8 +601,6 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write("\n") def trim_markers_for_table(markers): - num_markers = len(markers) - if 'lod_score' in markers[0].keys(): sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True) else: @@ -664,8 +611,4 @@ def trim_markers_for_table(markers): trimmed_sorted_markers = sorted_markers[:200] return trimmed_sorted_markers else: - return sorted_markers - - -if __name__ == '__main__': - import cPickle as pickle + return sorted_markers
\ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py index 66884b0c..da713325 100644 --- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py +++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py @@ -30,7 +30,7 @@ from math import * import piddle as pid import sys,os import cPickle -import httplib, urllib +import httplib from flask import Flask, g @@ -52,7 +52,6 @@ logger = utility.logger.getLogger(__name__ ) ######################################### class MarkerRegression(object): cMGraphInterval = 5 - maxBootStrap = 50 GRAPH_MIN_WIDTH = 900 GRAPH_MAX_WIDTH = 10000 # Don't set this too high GRAPH_DEFAULT_WIDTH = 1280 @@ -81,8 +80,6 @@ class MarkerRegression(object): DRAW_DETAIL_MB = 4 DRAW_UTR_LABELS_MB = 4 - MIN_PIXELS_BETWEEN_LABELS = 50 - qmarkImg = HT.Image('/images/qmarkBoxBlue.gif', width=10, height=13, border=0, alt='Glossary') # Note that "qmark.gif" is a similar, smaller, rounded-edges question mark. It doesn't look # like the ones on the image, though, which is why we don't use it here. @@ -93,11 +90,8 @@ class MarkerRegression(object): NR_INDIVIDUALS = 0 ## END HaplotypeAnalyst - ALEX_DEBUG_BOOL_COLORIZE_GENES = 1 # 0=don't colorize, 1=colorize ALEX_DEBUG_BOOL_PRINT_GENE_LIST = 1 - kWIDTH_DEFAULT=1 - kONE_MILLION = 1000000 LODFACTOR = 4.61 @@ -105,18 +99,14 @@ class MarkerRegression(object): SNP_COLOR = pid.orange # Color for the SNP "seismograph" TRANSCRIPT_LOCATION_COLOR = pid.mediumpurple - GENE_FILL_COLOR = pid.HexColor(0x6666FF) - GENE_OUTLINE_COLOR = pid.HexColor(0x000077) BOOTSTRAP_BOX_COLOR = pid.yellow LRS_COLOR = pid.HexColor(0x0000FF) - LRS_LINE_WIDTH = 2 SIGNIFICANT_COLOR = pid.HexColor(0xEBC7C7) SUGGESTIVE_COLOR = pid.gainsboro SIGNIFICANT_WIDTH = 5 SUGGESTIVE_WIDTH = 5 ADDITIVE_COLOR_POSITIVE = pid.green ADDITIVE_COLOR_NEGATIVE = pid.orange - ADDITIVE_COLOR = ADDITIVE_COLOR_POSITIVE DOMINANCE_COLOR_POSITIVE = pid.darkviolet DOMINANCE_COLOR_NEGATIVE = pid.red @@ -127,15 +117,7 @@ class MarkerRegression(object): HAPLOTYPE_RECOMBINATION = pid.darkgray ## END HaplotypeAnalyst - QMARK_EDGE_COLOR = pid.HexColor(0x718118) - QMARK_FILL_COLOR = pid.HexColor(0xDEE3BB) - TOP_RIGHT_INFO_COLOR = pid.black - X_AXIS_LABEL_COLOR = pid.black #HexColor(0x505050) - - MINI_VIEW_MAGNIFIED_REGION_COLOR = pid.HexColor(0xCC0000) - MINI_VIEW_OUTSIDE_REGION_COLOR = pid.HexColor(0xEEEEEE) - MINI_VIEW_BORDER_COLOR = pid.black CLICKABLE_WEBQTL_REGION_COLOR = pid.HexColor(0xF5D3D3) CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR = pid.HexColor(0xFCE9E9) @@ -154,18 +136,9 @@ class MarkerRegression(object): HELP_PAGE_REF = '/glossary.html' - DRAW_UTR_LABELS=0 - def __init__(self, start_vars): - - #templatePage.__init__(self, fd) - - #if not self.openMysql(): - # return logger.info("Running qtlreaper") - #helper_functions.get_species_dataset_trait(self, start_vars) - self.temp_uuid = start_vars['temp_uuid'] self.dataset = start_vars['dataset'] @@ -190,21 +163,6 @@ class MarkerRegression(object): self.js_data = start_vars['js_data'] self.trimmed_markers = start_vars['trimmed_markers'] #Top markers to display in table - #ZS: Think I can just get all this from dataset object now - #RISet and Species - #if not fd.genotype: - # fd.readGenotype() - # - #fd.parentsf14regression = fd.formdata.getvalue('parentsf14regression') - # - #if ((fd.parentsf14regression == 'on') and fd.genotype_2): - # fd.genotype = fd.genotype_2 - #else: - # fd.genotype = fd.genotype_1 - #fd.strainlist = list(fd.genotype.prgy) - # - #self.species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, RISet=fd.RISet) - if self.dataset.group.species == "rat": self._ucscDb = "rn3" elif self.dataset.group.species == "mouse": @@ -212,7 +170,6 @@ class MarkerRegression(object): else: self._ucscDb = "" - ##################################### # Options ##################################### @@ -265,17 +222,7 @@ class MarkerRegression(object): if 'use_loco' in start_vars.keys(): self.use_loco = start_vars['use_loco'] - #try: self.selectedChr = int(start_vars['selected_chr']) - #except: - # self.selectedChr = -1 - - #whether include parents and F1 for InbredSet - #fd.parentsf14regression = fd.formdata.getvalue('parentsf14regression') - #if ((fd.parentsf14regression == 'on') and fd.genotype_2): - # fd.genotype = fd.genotype_2 - #else: - # fd.genotype = fd.genotype_1 self.strainlist = self.dataset.group.samplelist self.genotype = self.dataset.group.read_genotype_file() @@ -295,7 +242,6 @@ class MarkerRegression(object): self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH ## BEGIN HaplotypeAnalyst - #self.haplotypeAnalystChecked = fd.formdata.getvalue('haplotypeAnalystCheck') if 'haplotypeAnalystCheck' in start_vars.keys(): self.haplotypeAnalystChecked = start_vars['haplotypeAnalystCheck'] else: @@ -308,7 +254,6 @@ class MarkerRegression(object): self.LRS_LOD = start_vars['LRSCheck'] else: self.LRS_LOD = start_vars['score_type'] - self.cutoff = start_vars['cutoff'] self.intervalAnalystChecked = True self.draw2X = False if 'additiveCheck' in start_vars.keys(): @@ -340,34 +285,8 @@ class MarkerRegression(object): except: self.lrsMax = 0 - #self.additiveChecked = fd.formdata.getvalue('additiveCheck') - #self.dominanceChecked = fd.formdata.getvalue('dominanceCheck') - #self.LRS_LOD = fd.formdata.getvalue('LRSCheck', 'LRS') - #self.intervalAnalystChecked = fd.formdata.getvalue('intervalAnalystCheck') - #self.legendChecked = fd.formdata.getvalue('viewLegend') - #self.geneChecked = fd.formdata.getvalue('showGenes') - #self.SNPChecked = fd.formdata.getvalue('showSNP') - #self.draw2X = fd.formdata.getvalue('draw2X') - #self.lrsMax = float(fd.formdata.getvalue('lrsMax', 0)) - #self.startMb = fd.formdata.getvalue('startMb', "-1") - #self.endMb = fd.formdata.getvalue('endMb', "-1") - - #try: - # self.startMb = float(self.startMb) - # self.endMb = float(self.endMb) - # if self.startMb > self.endMb: - # temp = self.startMb - # self.startMb = self.endMb - # self.endMb = temp - # #minimal distance 10bp - # if self.endMb - self.startMb < 0.00001: - # self.endMb = self.startMb + 0.00001 - #except: - # self.startMb = self.endMb = -1 - #Trait Infos self.identification = "" - #self.identification = fd.formdata.getvalue('identification', "") ################################################################ # Generate Chr list and Retrieve Length Information @@ -406,51 +325,10 @@ class MarkerRegression(object): else: self.GraphInterval = self.cMGraphInterval #cM - ################################################################ - # Get Trait Values and Infomation - ################################################################ - ##input from search page or selection page - #self.searchResult = fd.formdata.getvalue('searchResult') - ##convert single selection into a list - #if type("1") == type(self.searchResult): - # self.searchResult = string.split(self.searchResult,'\t') - # - #self.traitList = [] - #if self.searchResult and len(self.searchResult) > webqtlConfig.MULTIPLEMAPPINGLIMIT: - # heading = 'Multiple Interval Mapping' - # detail = ['In order to get clear result, do not select more than %d traits for \ - # Multiple Interval Mapping analysis.' % webqtlConfig.MULTIPLEMAPPINGLIMIT] - # self.error(heading=heading,detail=detail) - # return - #elif self.searchResult: - # self.dataSource = 'selectionPage' - # for item in self.searchResult: - # thisTrait = webqtlTrait(fullname=item, cursor=self.cursor) - # thisTrait.retrieveInfo() - # thisTrait.retrieveData(fd.strainlist) - # self.traitList.append(thisTrait) - #else: - - #input from data editing page - #fd.readData() - #if not fd.allTraitData: - # heading = "Mapping" - # detail = ['No trait data was selected for %s data set. No mapping attempted.' % fd.RISet] - # self.error(heading=heading,detail=detail) - # return - - self.dataSource = 'editingPage' self.traitList = [] thisTrait = start_vars['this_trait'] - #fullname = fd.formdata.getvalue('fullname', '') - #if fullname: - # thisTrait = webqtlTrait(fullname=fullname, data=fd.allTraitData, cursor=self.cursor) - # thisTrait.retrieveInfo() - #else: - # thisTrait = webqtlTrait(data=fd.allTraitData) self.traitList.append(thisTrait) - ## BEGIN HaplotypeAnalyst ## count the amount of individuals to be plotted, and increase self.graphHeight if self.haplotypeAnalystChecked and self.selectedChr > -1: @@ -468,8 +346,6 @@ class MarkerRegression(object): self.NR_INDIVIDUALS = self.NR_INDIVIDUALS + 1 # default: self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS+10) * self.EACH_GENE_HEIGHT -## for paper: - # #self.graphHeight = self.graphHeight + 1 * self.NR_INDIVIDUALS * self.EACH_GENE_HEIGHT - 180 ## END HaplotypeAnalyst ################################################################ @@ -477,12 +353,6 @@ class MarkerRegression(object): ################################################################ self.multipleInterval = len(self.traitList) > 1 self.qtlresults = start_vars['qtl_results'] - #errorMessage = self.calculateAllResult(fd) - #if errorMessage: - # heading = "Mapping" - # detail = ['%s' % errorMessage] - # self.error(heading=heading,detail=detail) - # return if self.multipleInterval: self.colorCollection = Plot.colorSpectrum(len(self.qtlresults)) @@ -511,9 +381,6 @@ class MarkerRegression(object): for i, strain in enumerate(self.diffCol): self.diffCol[i] = g.db.execute("select Id from Strain where Symbol = %s", strain).fetchone()[0] - #self.cursor.execute("select Id from Strain where Symbol = %s", strain) - #self.diffCol[i] = self.cursor.fetchone()[0] - #print self.diffCol ################################################################ # GeneCollection goes here @@ -536,13 +403,13 @@ class MarkerRegression(object): chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "mouse") + self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "mouse") elif self.dataset.group.species == "rat": if self.selectedChr == 21: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "rat") + self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "rat") if self.geneCol and self.intervalAnalystChecked: ####################################################################### @@ -551,26 +418,13 @@ class MarkerRegression(object): #through set GENEID is None # ####################################################################### - #GENEID = fd.formdata.getvalue('GeneId') or None GENEID = None - geneTableContainer = HT.Div(Id="sortable") #Div to hold table self.geneTable(self.geneCol, GENEID) - #geneTable = self.geneTable(self.geneCol, GENEID) - #geneTableContainer.append(geneTable) - - #mainfmName = webqtlUtil.genRandStr("fm_") - #tableForm = HT.Form(cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name=mainfmName, submit=HT.Input(type='hidden')) - #tableForm.append(HT.Input(name='FormID', value='', type='hidden')) - #tableForm.append(geneTableContainer) - ################################################################ # Plots goes here ################################################################ - #if self.plotScale != 'physic' or self.multipleInterval: - # showLocusForm = webqtlUtil.genRandStr("fm_") - #else: showLocusForm = "" intCanvas = pid.PILCanvas(size=(self.graphWidth, self.graphHeight)) gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) @@ -586,23 +440,6 @@ class MarkerRegression(object): intCanvasX2 = pid.PILCanvas(size=(self.graphWidth*2,self.graphHeight*2)) gifmapX2 = self.plotIntMapping(intCanvasX2, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm, zoom=2) intCanvasX2.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename+"X2"), format='png') - #DLintImgX2=HT.Href(text='Download',url = '/image/'+self.filename+'X2.png', Class='smallsize', target='_blank') - - #textUrl = self.writeQTL2Text(fd, self.filename) - - ################################################################ - # Info tables goes here - ################################################################ - #traitInfoTD = self.traitInfoTD(fd) - - #if self.draw2X: - # traitInfoTD.append(HT.P(), DLintImgX2, ' a higher resolution 2X image. ') - #else: - # traitInfoTD.append(HT.P()) - #if textUrl: - # traitInfoTD.append(HT.BR(), textUrl, ' results in tab-delimited text format.') - #traitRemapTD = self.traitRemapTD(self.cursor, fd) - #topTable = HT.TableLite(HT.TR(traitInfoTD, HT.TD(" ", width=25), traitRemapTD), border=0, cellspacing=0, cellpadding=0) ################################################################ # Outputs goes here @@ -620,158 +457,15 @@ class MarkerRegression(object): if (self.permChecked and self.nperm > 0) and not (self.multipleInterval and 0 < self.nperm): self.perm_filename = self.drawPermutationHistogram() - #perm_text_file = self.permutationTextFile() ################################################################ # footnote goes here ################################################################ btminfo = HT.Paragraph(Id="smallsize") #Small('More information about this graph is available here.') - #if (self.additiveChecked): - # btminfo.append(HT.BR(), 'A positive additive coefficient (', HT.Font('green', color='green'), ' line) indicates that %s alleles increase trait values. In contrast, a negative additive coefficient (' % fd.ppolar, HT.Font('red', color='red'), ' line) indicates that %s alleles increase trait values.' % fd.mpolar) - if self.traitList and self.traitList[0].dataset and self.traitList[0].dataset.type == 'Geno': btminfo.append(HT.BR(), 'Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') - #if self.permChecked and not self.multipleInterval and 0 < self.nperm: - # TD_LR = HT.TD(HT.Blockquote(gifmap, showLocusForm, HT.P(), btminfo, HT.P(), perm_histogram, HT.P(), perm_text_file), bgColor='#eeeeee', height = 200) - # #TD_LR = HT.TD(HT.Blockquote(topTable), HT.Blockquote(gifmap, showLocusForm, HT.P(), btminfo, HT.P(), perm_histogram, HT.P(), perm_text_file), bgColor='#eeeeee', height = 200) - #else: - TD_LR = HT.TD(HT.Blockquote(gifmap, showLocusForm, HT.P(), btminfo), bgColor='#eeeeee', height = 200) - #TD_LR = HT.TD(HT.Blockquote(topTable), HT.Blockquote(gifmap, showLocusForm, HT.P(), btminfo, HT.P(), perm_histogram, HT.P(), perm_text_file), bgColor='#eeeeee', height = 200) - - - if geneTable: - iaForm = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, "main.py?FormID=intervalAnalyst"), enctype='multipart/form-data', - name="iaForm", submit=HT.Input(type='hidden')) - hddn = {'chromosome':self.genotype[0].name, 'species':self.species,'startMb':self.startMb,'endMb':self.endMb} - if self.diffCol: - hddn['s1'] = self.diffCol[0] - hddn['s2'] = self.diffCol[1] - for key in hddn.keys(): - iaForm.append(HT.Input(name=key, value=hddn[key], type='hidden')) - iaForm.append(HT.Paragraph("Interval Analyst : Chr %s from %2.6f to %2.6f Mb" % (self.genotype[0].name, self.startMb, self.endMb), - HT.Input(name='customize', value='Customize', onClick= "formInNewWindow(this.form);", type='button', Class="button"), Class="subtitle")) - TD_LR.append(HT.Blockquote(iaForm)) - # optionsTable - selectall = HT.Href(url="#redirect", onClick="checkAll(document.getElementsByName('%s')[0]);" % mainfmName) - selectall_img = HT.Image("/images/select_all2_final.jpg", name="selectall", alt="Select All", title="Select All", style="border:none;") - selectall.append(selectall_img) - reset = HT.Href(url="#redirect", onClick="checkNone(document.getElementsByName('%s')[0]); return false;" % mainfmName) - reset_img = HT.Image("/images/select_none2_final.jpg", alt="Select None", title="Select None", style="border:none;") - reset.append(reset_img) - selectinvert = HT.Href(url="#redirect", onClick = "checkInvert(document.getElementsByName('%s')[0]);" % mainfmName) - selectinvert_img = HT.Image("/images/invert_selection2_final.jpg", name="selectinvert", alt="Invert Selection", title="Invert Selection", style="border:none;") - selectinvert.append(selectinvert_img) - addselect = HT.Href(url="#redirect", onClick="addRmvSelection('%s', document.getElementsByName('%s')[0], 'addToSelection');" % (RISet, mainfmName)) - addselect_img = HT.Image("/images/add_collection1_final.jpg", name="addselect", alt="Add To Collection", title="Add To Collection", style="border:none;") - addselect.append(addselect_img) - geneweaver = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'ODEIM');" % mainfmName) - geneweaver_img = HT.Image("/images/ODE_logo_final.jpg", name="GeneWeaver", alt="Gene Weaver", title="Gene Weaver", style="border:none") - geneweaver.append(geneweaver_img) - optionsTable = HT.TableLite() - optionsTable.append(HT.TR( - HT.TD(selectall, width="77", style="text-align:center"), - HT.TD(reset, width="77", style="text-align:center"), - HT.TD(selectinvert, width="77", style="text-align:center"), - HT.TD(geneweaver, width="77", style="text-align:center"), - )) - optionsTable.append(HT.TR( - HT.TD("Select", style="text-align:center"), - HT.TD("Deselect", style="text-align:center"), - HT.TD("Invert", style="text-align:center"), - HT.TD("Gene Weaver", style="text-align:center"), - )) - TD_LR.append(HT.Blockquote(optionsTable)) - # geneTableContainer - TD_LR.append(HT.Blockquote(tableForm)) - - self.body = TD_LR - - #self.dict['body'] = TD_LR - #self.dict['title'] = "Mapping" - - - def writeQTL2Text(self, filename): - if self.multipleInterval: - return "" - #_dominance = (self.genotype.type == 'intercross') - _Mb = self.genotype.Mbmap - - ###Write to text file - fpText = open(os.path.join(webqtlConfig.TMPDIR, filename) + '.txt','wb') - - fpText.write("Source: WebQTL, The GeneNetwork (%s)\n" % webqtlConfig.PORTADDR) - # - fpText.write("Site: GN\n") - fpText.write("Page: Map Viewer\n") - fpText.write(time.strftime("Date and Time (US Center): %b %d, %Y at %I.%M %p\n", time.localtime())) - fpText.write("Trait ID: %s\n" % self.this_trait.name) - fpText.write("Suggestive LRS = %0.2f\n" % self.suggestive) - fpText.write("Significant LRS = %0.2f\n" % self.significant) - """ - if self.this_trait.symbol and self.this_trait.chr and self.this_trait.mb: - writeSymbol, writeChromosome, writeMb = self.this_trait.symbol, self.this_trait.chr, self.this_trait.mb - else: - writeSymbol, writeChromosome, writeMb = (" ", " ", " ") - fpText.write("Gene Symbol: %s\n" % writeSymbol) - fpText.write("Location: Chr %s @ %s Mb\n" % (writeChromosome, writeMb)) - #selectedChr = self.indexToChrName(int(fd.formdata.getvalue('chromosomes', -1))) - #fpText.write("Chromosome: %s\n" % selectedChr) - fpText.write("Region: %0.6f-%0.6f Mb\n\n" % (self.startMb, self.endMb)) - """ - - if hasattr(self, 'LRSArray'): - if _dominance: - fpText.write('Chr\tLocus\tcM\tMb\tLRS\tP-value\tAdditive\tDominance\n') - else: - fpText.write('Chr\tLocus\tcM\tMb\tLRS\tP-value\tAdditive\n') - else: - if _dominance: - fpText.write('Chr\tLocus\tcM\tMb\tLRS\tAdditive\tDominance\n') - else: - fpText.write('Chr\tLocus\tcM\tMb\tLRS\tAdditive\n') - - i = 0 - for marker in self.qtlresults: - if _Mb: - locusMb = '%2.3f' % marker['Mb'] - else: - locusMb = 'N/A' - - if hasattr(self, 'LRSArray'): - if start_vars['score_type'] == "LRS": - lrs_lod = marker['lrs_value'] - else: - lrs_lod = marker['lod_score'] - - P_value = self.calculatePValue(lrs_lod, self.perm_output) - - #if _dominance: - # fpText.write("%s\t%s\t%2.3f\t%s\t%2.3f\t%2.3f\t%2.3f\t%2.3f\n" %(qtlresult.locus.chr, \ - # qtlresult.locus.name, qtlresult.locus.cM, locusMb , qtlresult.lrs, P_value, qtlresult.additive, qtlresult.dominance)) - #else: - if P_value: - fpText.write("%s\t%s\t%2.3f\t%s\t%2.3f\t%2.3f\n" %(marker['chr'], \ - marker['name'], marker['cM'], locusMb, lrs_lod, P_value)) - else: - fpText.write("%s\t%s\t%2.3f\t%s\t%2.3f\t%s\n" %(marker['chr'], \ - marker['name'], marker['cM'], locusMb , lrs_lod, '-')) - else: - #if _dominance: - # fpText.write("%s\t%s\t%2.3f\t%s\t%2.3f\t%2.3f\t%2.3f\n" %(qtlresult.locus.chr, \ - # qtlresult.locus.name, qtlresult.locus.cM, locusMb , qtlresult.lrs, qtlresult.additive, qtlresult.dominance)) - #else: - fpText.write("%s\t%s\t%2.3f\t%s\t%2.3f\n" %(marker['chr'], \ - marker['name'], marker['cM'], locusMb , lrs_lod)) - - i += 1 - - fpText.close() - textUrl = '/tmp/'+filename+'.txt' - #textUrl = HT.Href(text = 'Download', url= '/tmp/'+filename+'.txt', target = "_blank", Class='smallsize') - return textUrl - def plotIntMapping(self, canvas, offset= (80, 120, 20, 100), zoom = 1, startMb = None, endMb = None, showLocusForm = ""): #calculating margins xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset @@ -798,8 +492,6 @@ class MarkerRegression(object): cHeight = canvas.size[1] plotWidth = cWidth - xLeftOffset - xRightOffset plotHeight = cHeight - yTopOffset - yBottomOffset - startPixelX = xLeftOffset - endPixelX = (xLeftOffset + plotWidth) #Drawing Area Height drawAreaHeight = plotHeight @@ -823,7 +515,6 @@ class MarkerRegression(object): #Image map gifmap = HT.Map(name = "WebQTLImageMap") - #gifmap = None newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) # Draw the alternating-color background first and get plotXScale @@ -1002,15 +693,6 @@ class MarkerRegression(object): this_chr = str(self.ChrList[self.selectedChr][0]) else: this_chr = str(self.ChrList[self.selectedChr][1]+1) - # for i, qtlresult in enumerate(self.qtlresults): - # if Chr == this_chr: - # if Mb < self.startMb or Mb > self.endMb: - # return - # else: - # locPixel = xLeftOffset + (Mb-self.startMb)*plotXScale - # break - # elif self.selectedChr == -1: - # if str(qtlresult['chr']) != Chr: if self.plotScale == 'physic': if self.selectedChr > -1: @@ -1068,7 +750,6 @@ class MarkerRegression(object): canvas.drawPolygon(((leftOffset+6, startPosY-6), (leftOffset, startPosY+6), (leftOffset+12, startPosY+6)), edgeColor=pid.black, fillColor=self.TRANSCRIPT_LOCATION_COLOR, closed=1) canvas.drawString("Sequence Site", (leftOffset+15), (startPosY+5), smallLabelFont, self.TOP_RIGHT_INFO_COLOR) - def drawSNPTrackNew(self, canvas, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): if self.plotScale != 'physic' or self.selectedChr == -1 or not self.diffCol: return @@ -1108,7 +789,7 @@ class MarkerRegression(object): snpDensity = float(SNPCounts[i-xLeftOffset]*SNP_HEIGHT_MODIFIER/maxCount) canvas.drawLine(i, drawSNPLocationY+(snpDensity)*zoom, i, drawSNPLocationY-(snpDensity)*zoom, color=self.SNP_COLOR, width=1) - def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset= (40, 120, 80, 10), zoom = 1, locLocation= None): + def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset= (40, 120, 80, 10), zoom = 1): nameWidths = [] yPaddingTop = 10 colorFont=pid.Font(ttf="trebuc",size=12,bold=1) @@ -1136,14 +817,12 @@ class MarkerRegression(object): canvas.drawRect(rightShift,yPaddingTop+kstep*15, rectWidth+rightShift,yPaddingTop+10+kstep*15, fillColor=thisLRSColor) canvas.drawString(name,rectWidth+2+rightShift,yPaddingTop+10+kstep*15,font=colorFont,color=pid.black) if thisTrait.db: - COORDS = "%d,%d,%d,%d" %(rectWidth+2+rightShift,yPaddingTop+kstep*15,rectWidth+2+rightShift+nameWidth,yPaddingTop+10+kstep*15,) HREF= "javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm, thisTrait.db.name, thisTrait.name) Areas = HT.Area(shape='rect',coords=COORDS,href=HREF) gifmap.areas.append(Areas) - - def drawLegendPanel(self, canvas, offset= (40, 120, 80, 10), zoom = 1, locLocation= None): + def drawLegendPanel(self, canvas, offset= (40, 120, 80, 10), zoom = 1): xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset plotHeight = canvas.size[1] - yTopOffset - yBottomOffset @@ -1187,31 +866,43 @@ class MarkerRegression(object): startPosX = xLeftOffset canvas.drawLine(startPosX, startPosY, startPosX + 32, startPosY, color=self.SIGNIFICANT_COLOR, width=self.SIGNIFICANT_WIDTH) canvas.drawLine(startPosX, startPosY + stepPosY, startPosX + 32, startPosY + stepPosY, color=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH) - lod = 1 - if self.LRS_LOD == 'LOD': - lod = self.LODFACTOR canvas.drawString('Significant %s = %2.2f' % (self.LRS_LOD, self.significant),xLeftOffset+42,startPosY +5,font=labelFont,color=pid.black) canvas.drawString('Suggestive %s = %2.2f' % (self.LRS_LOD, self.suggestive),xLeftOffset+42,startPosY + 5 +stepPosY,font=labelFont,color=pid.black) - labelFont=pid.Font(ttf="verdana",size=12*fontZoom) + labelFont = pid.Font(ttf="verdana",size=12*fontZoom) labelColor = pid.black if self.selectedChr == -1: string1 = 'Mapping for Dataset: %s, mapping on All Chromosomes' % self.dataset.group.name else: string1 = 'Mapping for Dataset: %s, mapping on Chromosome %s' % (self.dataset.group.name, self.ChrList[self.selectedChr][0]) - if self.controlLocus and self.doControl != "false": - string2 = 'Using %s as control' % self.controlLocus + + string3 = '' + if self.mapping_method == "gemma" or self.mapping_method == "gemma_bimbam": + if self.use_loco == "True": + string2 = 'Using GEMMA mapping method with LOCO and ' + else: + string2 = 'Using GEMMA mapping method with ' + if self.covariates != "": + string2 += 'the cofactors below:' + cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + string3 = cofactor_names + else: + string2 += 'no cofactors' + elif self.mapping_method == "rqtl_plink" or self.mapping_method == "rqtl_geno": + string2 = 'Using R/qtl mapping method with ' + if self.controlLocus and self.doControl != "false": + string2 += '%s as control' % self.controlLocus + else: + string2 += 'no control for other QTLs' + elif self.mapping_method == "plink": + string2 = 'Using PLINK mapping method with no control for other QTLs' else: - if self.mapping_method == "gemma" or self.mapping_method == "gemma_bimbam": - string2 = 'Using GEMMA mapping method with no control for other QTLs.' - if self.covariates != "": - string3 = 'Using following traits as covariates: ' + self.covariates - elif self.mapping_method == "rqtl_plink" or self.mapping_method == "rqtl_geno": - string2 = 'Using R/qtl mapping method with no control for other QTLs.' - elif self.mapping_method == "plink": - string2 = 'Using PLINK mapping method with no control for other QTLs.' + string2 = 'Using Haldane mapping function with ' + if self.controlLocus and self.doControl != "false": + string2 += '%s as control' % self.controlLocus else: - string2 = 'Using Haldane mapping function with no control for other QTLs' + string2 += 'no control for other QTLs' + if self.this_trait.name: identification = "Trait ID: %s : %s" % (self.dataset.fullname, self.this_trait.name) d = 4+ max(canvas.stringWidth(identification, font=labelFont), canvas.stringWidth(string1, font=labelFont), canvas.stringWidth(string2, font=labelFont)) @@ -1220,6 +911,8 @@ class MarkerRegression(object): d = 4+ max(canvas.stringWidth(string1, font=labelFont), canvas.stringWidth(string2, font=labelFont)) canvas.drawString(string1,canvas.size[0] - xRightOffset-d,35*fontZoom,font=labelFont,color=labelColor) canvas.drawString(string2,canvas.size[0] - xRightOffset-d,50*fontZoom,font=labelFont,color=labelColor) + if string3 != '': + canvas.drawString(string3,canvas.size[0] - xRightOffset-d,65*fontZoom,font=labelFont,color=labelColor) def drawGeneBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): @@ -1236,9 +929,6 @@ class MarkerRegression(object): yPaddingTop = yTopOffset - displayStartInBases = startMb*self.kONE_MILLION - displayEndInBases = endMb*self.kONE_MILLION - for gIndex, theGO in enumerate(self.geneCol): geneNCBILink = 'http://www.ncbi.nlm.nih.gov/gene?term=%s' if self.dataset.group.species == "mouse": @@ -1253,7 +943,6 @@ class MarkerRegression(object): cdsStart = theGO['cdsStart'] cdsEnd = theGO['cdsEnd'] accession = theGO['NM_ID'] - geneId = theGO['GeneID'] geneSymbol = theGO["GeneSymbol"] strand = theGO["Strand"] exonCount = theGO["exonCount"] @@ -1271,10 +960,7 @@ class MarkerRegression(object): geneStartPix = xLeftOffset; # clip the first in-range gene #color the gene based on SNP density - - #found earlier, needs to be recomputed as snps are added - #always apply colors now, even if SNP Track not checked - Zach 11/24/2010 densities=[1.0000000000000001e-05, 0.094094033555233408, 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] @@ -1307,7 +993,6 @@ class MarkerRegression(object): txEnd = theGO["TxEnd"] cdsStart = theGO["TxStart"] cdsEnd = theGO["TxEnd"] - geneId = theGO["GeneID"] geneSymbol = theGO["GeneSymbol"] strand = theGO["Strand"] exonCount = 0 @@ -1336,11 +1021,7 @@ class MarkerRegression(object): #Draw Genes geneYLocation = yPaddingTop + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT*zoom - - if 1:#drawClickableRegions: - geneYLocation += self.UCSC_BAND_HEIGHT + self.BAND_SPACING + self.ENSEMBL_BAND_HEIGHT + self.BAND_SPACING + self.WEBQTL_BAND_HEIGHT + self.BAND_SPACING - else: - geneYLocation += self.BAND_SPACING + geneYLocation += self.UCSC_BAND_HEIGHT + self.BAND_SPACING + self.ENSEMBL_BAND_HEIGHT + self.BAND_SPACING + self.WEBQTL_BAND_HEIGHT + self.BAND_SPACING #draw the detail view if self.endMb - self.startMb <= self.DRAW_DETAIL_MB and geneEndPix - geneStartPix > self.EACH_GENE_ARROW_SPACING * 3: @@ -1348,7 +1029,6 @@ class MarkerRegression(object): arrowColor = pid.Color(0.7, 0.7, 0.7) #draw the line that runs the entire length of the gene - #canvas.drawString(str(geneStartPix), 300, 400) canvas.drawLine(geneStartPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom, geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom, color=outlineColor, width=1) #draw the arrows @@ -1398,7 +1078,6 @@ class MarkerRegression(object): utrStartPix = xLeftOffset + plotWidth #canvas.drawRect(utrStartPix, geneYLocation, utrEndPix, (geneYLocation+self.EACH_GENE_HEIGHT*zoom), edgeColor=utrColor, fillColor =utrColor) - #if self.DRAW_UTR_LABELS and self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: if self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: if strand == "-": labelText = "3'" @@ -1420,7 +1099,6 @@ class MarkerRegression(object): utrStartPix = xLeftOffset + plotWidth #canvas.drawRect(utrStartPix, geneYLocation, utrEndPix, (geneYLocation+self.EACH_GENE_HEIGHT*zoom), edgeColor=utrColor, fillColor =utrColor) - #if self.DRAW_UTR_LABELS and self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: if self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: if strand == "-": labelText = "5'" @@ -1441,8 +1119,6 @@ class MarkerRegression(object): if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return - fpText = open(os.path.join(webqtlConfig.TMPDIR, "hallo") + '.txt','wb') - clickableRegionLabelFont=pid.Font(ttf="verdana", size=9, bold=0) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset @@ -1450,13 +1126,9 @@ class MarkerRegression(object): plotHeight = canvas.size[1] - yTopOffset - yBottomOffset yZero = canvas.size[1] - yBottomOffset fontZoom = zoom - widthMultiplier = 1 yPaddingTop = yTopOffset - exprdrawn = 0 - - #thisTrait = self.traitList[0] thisTrait = self.this_trait _strains, _vals, _vars, _aliases = thisTrait.export_informative() @@ -1466,7 +1138,6 @@ class MarkerRegression(object): temp = GeneralObject(name=_strains[ii], value=_val) smd.append(temp) - smd.sort(lambda A, B: cmp(A.value, B.value)) smd.reverse() @@ -1491,13 +1162,9 @@ class MarkerRegression(object): drawit = 0; if drawit == 1: - if self.genotype[0][i].name != " - " : - plotRight = geneEndPix + 4 - - #### end find out PlotRight firstGene = 1 @@ -1552,11 +1219,7 @@ class MarkerRegression(object): #Draw Genes geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * (self.EACH_GENE_HEIGHT)*zoom - - if 1:#drawClickableRegions: - geneYLocation += self.UCSC_BAND_HEIGHT + self.BAND_SPACING + self.ENSEMBL_BAND_HEIGHT + self.BAND_SPACING + self.WEBQTL_BAND_HEIGHT + self.BAND_SPACING - else: - geneYLocation += self.BAND_SPACING + geneYLocation += self.UCSC_BAND_HEIGHT + self.BAND_SPACING + self.ENSEMBL_BAND_HEIGHT + self.BAND_SPACING + self.WEBQTL_BAND_HEIGHT + self.BAND_SPACING if self.genotype[0][i].name != " - " : @@ -1643,8 +1306,6 @@ class MarkerRegression(object): canvas.drawString("%s" % (samplelist[j]), (xLeftOffset + plotWidth + 10) , geneYLocation+8+2*ind*self.EACH_GENE_HEIGHT*zoom, font=pid.Font(ttf="verdana", size=12, bold=0), color=pid.black) canvas.drawString("%2.2f" % (expr), (xLeftOffset + plotWidth + 60) , geneYLocation+8+2*ind*self.EACH_GENE_HEIGHT*zoom, font=pid.Font(ttf="verdana", size=12, bold=0), color=pid.black) - fpText.close() - ## END HaplotypeAnalyst def drawClickBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): @@ -1688,7 +1349,6 @@ class MarkerRegression(object): xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1)) WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop+self.WEBQTL_BAND_HEIGHT)) - bandWidth = xBrowse2 - xBrowse1 WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase-webqtlZoomWidth))/1000000.0, (calBase+webqtlZoomWidth)/1000000.0) WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" @@ -1726,7 +1386,6 @@ class MarkerRegression(object): traitFont = pid.Font(ttf="verdana", size=14, bold=0) chrX = xLeftOffset + plotWidth - 2 - canvas.stringWidth("Chr %s" % self.ChrList[self.selectedChr][0], font=chrFont) canvas.drawString("Chr %s" % self.ChrList[self.selectedChr][0], chrX, ensemblPaddingTop-5, font=chrFont, color=pid.gray) - traitX = chrX - 28 - canvas.stringWidth("database", font=traitFont) # end of drawBrowserClickableRegions else: #draw the gray text @@ -1734,7 +1393,6 @@ class MarkerRegression(object): traitFont = pid.Font(ttf="verdana", size=14, bold=0) chrX = xLeftOffset + (plotWidth - canvas.stringWidth("Chr %s" % currentChromosome, font=chrFont))/2 canvas.drawString("Chr %s" % currentChromosome, chrX, 32, font=chrFont, color=pid.gray) - traitX = chrX - 28 - canvas.stringWidth("database", font=traitFont) # end of drawBrowserClickableRegions pass @@ -1761,13 +1419,11 @@ class MarkerRegression(object): xAxisLabelColor = pid.black fontHeight = 12*fontZoom # How tall the font that we're using is spacingFromLabelToAxis = 5 - spacingFromLineToLabel = 3 if self.plotScale == 'physic': strYLoc = yZero + spacingFromLabelToAxis + canvas.fontHeight(MBLabelFont) ###Physical single chromosome view if self.selectedChr > -1: - graphMbWidth = endMb - startMb XScale = Plot.detScale(startMb, endMb) XStart, XEnd, XStep = XScale if XStep < 8: @@ -1793,7 +1449,6 @@ class MarkerRegression(object): canvas.drawString(labelStr, drawStringXc, strYLoc, font=MBLabelFont, color=xAxisLabelColor, angle=0) else: canvas.drawLine(Xc, yZero, Xc, yZero+xMinorTickHeight, color=xAxisTickMarkColor, width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark - # end else ###Physical genome wide view else: @@ -1823,7 +1478,6 @@ class MarkerRegression(object): preLpos = -1 distinctCount = 0.0 - #if len(self.genotype) > 1: if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes for i, _chr in enumerate(self.genotype): thisChr = [] @@ -1932,12 +1586,6 @@ class MarkerRegression(object): #LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. #if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. - #ZS: I'm not sure what this if statement is supposed to do. It appears to work correctly for both LOD and LRS if I just set lodm to 1.0 - # if self.LRS_LOD == 'LRS': - # lodm = self.LODFACTOR - # else: - # lodm = 1.0 - #ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD if self.lrsMax <= 0: #sliding scale if "lrs_value" in self.qtlresults[0]: @@ -2054,10 +1702,6 @@ class MarkerRegression(object): else: if self.additiveChecked: additiveMax = max(map(lambda X : abs(X['additive']), self.qtlresults)) - #if INTERCROSS: - # dominanceMax = max(map(lambda X : abs(X.dominance), self.qtlresults[0])) - #else: - # dominanceMax = -1 lrsEdgeWidth = 2 if zoom == 2: @@ -2076,7 +1720,6 @@ class MarkerRegression(object): startPosX = xLeftOffset for i, qtlresult in enumerate(self.qtlresults): m = 0 - #startPosX = xLeftOffset thisLRSColor = self.colorCollection[0] if qtlresult['chr'] != previous_chr and self.selectedChr == -1: @@ -2123,25 +1766,13 @@ class MarkerRegression(object): startPosX += newStartPosX oldStartPosX = newStartPosX - #startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale - - #for j, _chr in enumerate(self.genotype): #ZS: This is beause the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used if self.plotScale == "physic": this_chr = str(self.ChrList[self.selectedChr][0]) else: this_chr = str(self.ChrList[self.selectedChr][1]+1) if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr: - #AdditiveCoordXY = [] - #DominanceCoordXY = [] - #for k, _locus in enumerate(_chr): Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale - #if self.plotScale == 'physic': - #Xc = startPosX + (_locus.Mb-startMb)*plotXScale - #Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale - #else: - #Xc = startPosX + (_locus.cM-_chr[0].cM)*plotXScale - #Xc = startPosX + (qtlresult['cM']-qtlresult[0]['cM'])*plotXScale # updated by NL 06-18-2011: # fix the over limit LRS graph issue since genotype trait may give infinite LRS; @@ -2165,11 +1796,6 @@ class MarkerRegression(object): Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max else: Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max - #if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': - #if self.qtlresults[j]['lrs_value'] > 460 or self.qtlresults[j]['lrs_value']=='inf': - # Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max - #else: - # Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max if self.manhattan_plot == True: point_color = pid.black @@ -2182,11 +1808,8 @@ class MarkerRegression(object): additiveMax = 0.000001 Yc = yZero - qtlresult['additive']*AdditiveHeightThresh/additiveMax AdditiveCoordXY.append((Xc, Yc)) - # if not self.multipleInterval and INTERCROSS and self.additiveChecked: - # Yc = yZero - qtlresult['dominance']*DominanceHeightThresh/dominanceMax - # DominanceCoordXY.append((Xc, Yc)) + m += 1 - #canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) if self.manhattan_plot != True: canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) @@ -2258,7 +1881,6 @@ class MarkerRegression(object): additiveScale = Plot.detScaleOld(0,additiveMax) additiveStep = (additiveScale[1]-additiveScale[0])/additiveScale[2] additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) - maxAdd = additiveScale[1] addPlotScale = AdditiveHeightThresh/additiveMax additiveAxisList.append(additiveScale[1]) @@ -2350,298 +1972,6 @@ class MarkerRegression(object): return plotXScale - def calculateAllResult(self, fd): - - weightedRegression = fd.formdata.getvalue('applyVarianceSE') - - self.genotype = self.genotype.addinterval() - resultSlice = [] - controlGeno = [] - - if self.multipleInterval: - self.suggestive = 0 - self.significant = 0 - if self.selectedChr > -1: - self.genotype.chromosome = [self.genotype[self.selectedChr]] - else: - #single interval mapping - try: - self.suggestive = float(fd.formdata.getvalue('permSuggestive')) - self.significant = float(fd.formdata.getvalue('permSignificance')) - except: - self.suggestive = None - self.significant = None - - _strains, _vals, _vars = self.traitList[0].exportInformative(weightedRegression) - - if webqtlUtil.ListNotNull(_vars): - pass - else: - weightedRegression = 0 - _strains, _vals, _vars = self.traitList[0].exportInformative() - - ##locate genotype of control Locus - if self.controlLocus: - controlGeno2 = [] - _FIND = 0 - for _chr in self.genotype: - for _locus in _chr: - if _locus.name == self.controlLocus: - controlGeno2 = _locus.genotype - _FIND = 1 - break - if _FIND: - break - if controlGeno2: - _prgy = list(self.genotype.prgy) - for _strain in _strains: - _idx = _prgy.index(_strain) - controlGeno.append(controlGeno2[_idx]) - else: - return "The control marker you selected is not in the genofile." - - if weightedRegression: - self.perm_output = self.genotype.permutation(strains = _strains, trait = _vals, - variance = _vars, nperm=self.nperm) - else: - self.perm_output = self.genotype.permutation(strains = _strains, trait = _vals, - nperm=self.nperm) - - if self.significant and self.suggestive: - pass - else: - if self.nperm < 100: - self.suggestive = 0 - self.significant = 0 - else: - self.suggestive = self.perm_output[int(self.nperm*0.37-1)] - self.significant = self.perm_output[int(self.nperm*0.95-1)] - - #calculating bootstrap - #from now on, genotype could only contain a single chromosome - #permutation need to be performed genome wide, this is not the case for bootstrap - - #due to the design of qtlreaper, composite regression need to be performed genome wide - if not self.controlLocus and self.selectedChr > -1: - self.genotype.chromosome = [self.genotype[self.selectedChr]] - elif self.selectedChr > -1: #self.controlLocus and self.selectedChr > -1 - lociPerChr = map(len, self.genotype) - resultSlice = reduce(lambda X, Y: X+Y, lociPerChr[:self.selectedChr], 0) - resultSlice = [resultSlice,resultSlice+lociPerChr[self.selectedChr]] - else: - pass - - #calculate QTL for each trait - self.qtlresults = [] - - for thisTrait in self.traitList: - _strains, _vals, _vars = thisTrait.exportInformative(weightedRegression) - if self.controlLocus: - if weightedRegression: - qtlresult = self.genotype.regression(strains = _strains, trait = _vals, - variance = _vars, control = self.controlLocus) - else: - qtlresult = self.genotype.regression(strains = _strains, trait = _vals, - control = self.controlLocus) - if resultSlice: - qtlresult = qtlresult[resultSlice[0]:resultSlice[1]] - else: - if weightedRegression: - qtlresult = self.genotype.regression(strains = _strains, trait = _vals, - variance = _vars) - else: - qtlresult = self.genotype.regression(strains = _strains, trait = _vals) - - self.qtlresults.append(qtlresult) - - if not self.multipleInterval: - if self.controlLocus and self.selectedChr > -1: - self.genotype.chromosome = [self.genotype[self.selectedChr]] - - if self.bootChecked: - if controlGeno: - self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, - control = controlGeno, nboot=fd.nboot) - elif weightedRegression: - self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, - variance = _vars, nboot=fd.nboot) - else: - self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, - nboot=fd.nboot) - else: - self.bootResult = [] - - def calculatePValue (self, query_LRS, permutation_LRS_array): - query_index = len(permutation_LRS_array) - for i, one_permutation_LRS in enumerate(permutation_LRS_array): - if one_permutation_LRS >= query_LRS: - query_index = i - break - try: - P_value = float(len(permutation_LRS_array) - query_index) / len(permutation_LRS_array) - except: - P_value = '' - return P_value - - def helpButton(self, anchor): - return HT.Href(self.HELP_PAGE_REF + '#%s' % anchor, self.qmarkImg, target=self.HELP_WINDOW_NAME) - - - def traitRemapTD(self, cursor, fd): - chrList = HT.Select(name="chromosomes", data=self.ChrList, selected=[self.selectedChr], - onChange="chrLength(this.form.chromosomes.value, this.form.scale.value, this.form, self.ChrLengthMbList);") - - physicOnly = HT.Span(' *', Class="cr") - - showSNPCheck = HT.Input(type='checkbox', Class='checkbox', name='showSNP', value='ON', checked=self.SNPChecked) - showSNPText = HT.Span('SNP Track ', self.helpButton("snpSeismograph"), Class="fs12 fwn") - - showGenesCheck = HT.Input(type='checkbox', Class='checkbox', name='showGenes', value='ON', checked=self.geneChecked) - showGenesText = HT.Span('Gene Track', Class="fs12 fwn") - - showIntervalAnalystCheck = HT.Input(type='checkbox', Class='checkbox', name='intervalAnalystCheck', value='ON', checked=self.intervalAnalystChecked) - showIntervalAnalystText = HT.Span('Interval Analyst', Class="fs12 fwn") -## BEGIN HaplotypeAnalyst - - showHaplotypeAnalystCheck = HT.Input(type='checkbox', Class='checkbox', name='haplotypeAnalystCheck', value='ON', checked=self.haplotypeAnalystChecked) - showHaplotypeAnalystText = HT.Span('Haplotype Analyst', Class="fs12 fwn") -## END HaplotypeAnalyst - - leftBox = HT.Input(type="text", name="startMb", size=10) - rightBox = HT.Input(type="text", name="endMb", size=10) - if self.selectedChr > -1 and self.plotScale=='physic': - leftBox.value = self.startMb - rightBox.value = self.endMb - - scaleBox = HT.Select(name="scale", onChange="chrLength(this.form.chromosomes.value, this.form.scale.value, this.form, self.ChrLengthMbList);") - scaleBox.append(("Genetic", "morgan")) - if fd.genotype.Mbmap: - scaleBox.append(("Physical", "physic")) - scaleBox.selected.append(self.plotScale) - - permBox = HT.Input(type="checkbox", name="permCheck", value='ON', checked=self.permChecked, Class="checkbox") - permText = HT.Span("Permutation Test ", self.helpButton("Permutation"), Class="fs12 fwn") - bootBox = HT.Input(type="checkbox", name="bootCheck", value='ON', checked=self.bootChecked, Class="checkbox") - bootText = HT.Span("Bootstrap Test ", self.helpButton("bootstrap"), Class="fs12 fwn") - additiveBox = HT.Input(type="checkbox", name="additiveCheck", value='ON', checked=self.additiveChecked, Class="checkbox") - additiveText = HT.Span("Allele Effects ", self.helpButton("additive"), Class="fs12 fwn") - dominanceBox = HT.Input(type="checkbox", name="dominanceCheck", value='ON', checked=self.dominanceChecked, Class="checkbox") - dominanceText = HT.Span("Dominance Effects ", self.helpButton("Dominance"), Class="fs12 fwn") - - lrsRadio = HT.Input(type="radio", name="LRSCheck", value='LRS', checked = (self.LRS_LOD == "LRS")) - lodRadio = HT.Input(type="radio", name="LRSCheck", value='LOD', checked = (self.LRS_LOD != "LRS")) - lrsMaxBox = HT.Input(type="text", name="lrsMax", value=self.lrsMax, size=3) - widthBox = HT.Input(type="text", name="graphWidth", size=5, value=str(self.graphWidth)) - legendBox = HT.Input(type="checkbox", name="viewLegend", value='ON', checked=self.legendChecked, Class="checkbox") - legendText = HT.Span("Legend", Class="fs12 fwn") - - draw2XBox = HT.Input(type="checkbox", name="draw2X", value='ON', Class="checkbox") - draw2XText = HT.Span("2X Plot", Class="fs12 fwn") - - regraphButton = HT.Input(type="button", Class="button", onClick="javascript:databaseFunc(this.form,'showIntMap');", value="Remap") - controlsForm = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype="multipart/form-data", name="changeViewForm", submit=HT.Input(type='hidden')) - controlsTable = HT.TableLite(border=0) - innerControlsTable = HT.TableLite(border=0) - if self.selectedChr == -1: - minimumGraphWidth = self.MULT_GRAPH_MIN_WIDTH - else: - minimumGraphWidth = self.GRAPH_MIN_WIDTH - innerControlsTable.append( - HT.TR(HT.TD("Chr: ", Class="fs12 fwb ffl"),HT.TD(chrList, scaleBox, regraphButton)), - HT.TR(HT.TD("View: ", Class="fs12 fwb ffl"),HT.TD(leftBox, " to ", rightBox, "Mb", physicOnly, NOWRAP="on")), - HT.TR(HT.TD("Units: ", Class="fs12 fwb ffl"), HT.TD(lrsRadio, "LRS ", lodRadio, "LOD ", self.helpButton("LOD"))), - HT.TR(HT.TD(" ", Class="fs12 fwb ffl"), HT.TD(lrsMaxBox, "units on Y-axis (0 for default)", Class="fs11 fwn")), - HT.TR(HT.TD("Width: ", Class="fs12 fwb ffl"), HT.TD(widthBox, "pixels (minimum=%d)" % minimumGraphWidth, Class="fs11 fwn ")) - ) - #whether SNP - # comment this, because this will make caculation very slow. - #cursor.execute("Select Species.Id from SnpAll, Species where SnpAll.SpeciesId = Species.Id and Species.Name = %s limit 1", self.species) - #SNPorNot = cursor.fetchall() - SNPorNot = True - #Whether Gene - cursor.execute("Select Species.Id from GeneList, Species where GeneList.SpeciesId = Species.Id and Species.Name = %s limit 1", self.species) - GeneorNot = cursor.fetchall() - - if self.multipleInterval: - optionPanel = HT.TD(valign="top", NOWRAP="on") - else: - optionPanel = HT.TD(permBox, permText, HT.BR(), bootBox, bootText, HT.BR(), additiveBox, additiveText, HT.BR(), valign="top", NOWRAP="on") - #whether dominance - if self.genotype.type == 'intercross': - optionPanel.append(dominanceBox, dominanceText, HT.BR()) - if SNPorNot: - optionPanel.append(showSNPCheck, showSNPText, physicOnly, HT.BR()) - if GeneorNot: - optionPanel.append(showGenesCheck, showGenesText, physicOnly, HT.BR(), - showIntervalAnalystCheck, showIntervalAnalystText, physicOnly, HT.BR()) -## BEGIN HaplotypeAnalyst - optionPanel.append(showHaplotypeAnalystCheck, showHaplotypeAnalystText, physicOnly, HT.BR()) -## END HaplotypeAnalyst - optionPanel.append(legendBox, legendText, HT.BR(),draw2XBox, draw2XText) - controlsTable.append( - HT.TR(HT.TD(innerControlsTable, valign="top"), - HT.TD(" ", width=15), optionPanel), - HT.TR(HT.TD(physicOnly, " only apply to single chromosome physical mapping", align="Center", colspan=3, Class="fs11 fwn")) - ) - controlsForm.append(controlsTable) - - controlsForm.append(HT.Input(name="permSuggestive", value=self.suggestive, type="hidden")) - controlsForm.append(HT.Input(name="permSignificance", value=self.significant, type="hidden")) - -## BEGIN HaplotypeAnalyst #### haplotypeAnalystCheck added below -## END HaplotypeAnalyst - - for key in fd.formdata.keys(): - if key == "searchResult" and type([]) == type(fd.formdata.getvalue(key)): - controlsForm.append(HT.Input(name=key, value=string.join(fd.formdata.getvalue(key), "\t"), type="hidden")) - elif key not in ("endMb", "startMb", "chromosomes", "scale", "permCheck", "bootCheck", "additiveCheck", "dominanceCheck", - "LRSCheck", "intervalAnalystCheck", "haplotypeAnalystCheck", "lrsMax", "graphWidth", "viewLegend", 'showGenes', 'showSNP', 'draw2X', - 'permSuggestive', "permSignificance"): - controlsForm.append(HT.Input(name=key, value=fd.formdata.getvalue(key), type="hidden")) - else: - pass - - # updated by NL, move function changeView(i) to webqtl.js and change it to function changeView(i, Chr_Mb_list) - # move function chrLength(a, b, c) to webqtl.js and change it to function chrLength(a, b, c, Chr_Mb_list) - self.dict['js1'] = '<script src="/javascript/sorttable.js"></script>' - return HT.TD(controlsForm, Class="doubleBorder", width=400) - - def traitInfoTD(self, fd): - if self.selectedChr == -1: - intMapHeading = HT.Paragraph('Map Viewer: Whole Genome', Class="title") - else: - intMapHeading = HT.Paragraph('Map Viewer: Chr %s' % self.genotype[0].name, Class="title") - - heading2 = HT.Paragraph(HT.Strong('Population: '), "%s %s" % (self.species.title(), fd.RISet) , HT.BR()) - #Trait is from an database - if self.traitList and self.traitList[0] and self.traitList[0].db: - #single trait - if len(self.traitList) == 1: - thisTrait = self.traitList[0] - trait_url = HT.Href(text=thisTrait.name, url = os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE) + \ - "?FormID=showDatabase&incparentsf1=1&database=%s&ProbeSetID=%s" % (thisTrait.db.name, thisTrait.name), \ - target='_blank', Class="normalsize") - heading2.append(HT.Strong("Database: "), HT.Href(text=thisTrait.db.fullname, url = webqtlConfig.INFOPAGEHREF % thisTrait.db.name ,\ - target='_blank',Class="normalsize"),HT.BR()) - if thisTrait.db.type == 'ProbeSet': - heading2.append(HT.Strong('Trait ID: '), trait_url, HT.BR(), - HT.Strong("Gene Symbol: "), HT.Italic('%s' % thisTrait.symbol,id="green"),HT.BR()) - if thisTrait.chr and thisTrait.mb: - heading2.append(HT.Strong("Location: "), 'Chr %s @ %s Mb' % (thisTrait.chr, thisTrait.mb)) - elif thisTrait.db.type == 'Geno': - heading2.append(HT.Strong('Locus : '), trait_url, HT.BR()) - if thisTrait.chr and thisTrait.mb: - heading2.append(HT.Strong("Location: "), 'Chr %s @ %s Mb' % (thisTrait.chr, thisTrait.mb)) - elif thisTrait.db.type == 'Publish': - heading2.append(HT.Strong('Record ID: '), trait_url, HT.BR()) - else: - pass - else: - heading2.append(HT.Strong("Traits: "), "Multiple Traits") - else: - heading2.append(HT.Strong("Trait Name: "), fd.identification) - return HT.TD(intMapHeading, heading2, valign="top") - def drawPermutationHistogram(self): ######################################### # Permutation Graph @@ -2660,78 +1990,18 @@ class MarkerRegression(object): return filename - # img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test') - - # self.suggestive = self.perm_output[int(self.nperm*0.37-1)] - # self.significant = self.perm_output[int(self.nperm*0.95-1)] - # self.highlysignificant = self.perm_output[int(self.nperm*0.99-1)] - - # permutationHeading = HT.Paragraph('Histogram of Permutation Test') - # permutationHeading.__setattr__("class","title") - - # permutation = HT.TableLite() - # permutation.append(HT.TR(HT.TD(img)), - # HT.TR(HT.TD('')), - # HT.TR(HT.TD('Total of %d permutations'%self.nperm))) - - # return permutation - - def permutationTextFile(self): - filename= webqtlUtil.genRandStr("Reg_") - fpText = open('%s.txt' % (webqtlConfig.TMPDIR+filename), 'wb') - fpText.write('Suggestive LRS (p = 0.63) = %3.2f\n'%self.suggestive) - fpText.write('Significant LRS (p = 0.05) = %3.2f\n'%self.significant) - fpText.write('Highly Significant LRS (p = 0.01) = %3.2f\n\n'%self.highlysignificant) - fpText.write('%s Permutations\n\n' % str(len(self.perm_output))) - LRSInfo =HT.Paragraph(' Suggestive LRS = %3.2f\n'%self.suggestive, - HT.BR(), - ' Significant LRS =%3.2f\n'%self.significant, - HT.BR(), - ' Highly Significant LRS =%3.2f\n' % self.highlysignificant) - - for lrs_value in self.perm_output: - fpText.write(str(lrs_value) + "\n") - - textUrl = HT.Href(text = 'Download Permutation Results', url= '/tmp/'+filename+'.txt', target = "_blank", Class='fs12 fwn') - - return textUrl - def geneTable(self, geneCol, refGene=None): - #SNPLink = 0 #Not sure what this is used for - if self.dataset.group.species == 'mouse' or self.dataset.group.species == 'rat': - #gene_tblobj = {} - self.gene_table_header = self.getGeneTableHeader(refGene=None) + self.gene_table_header = self.getGeneTableHeaderList(refGene=None) self.gene_table_body = self.getGeneTableBody(geneCol, refGene=None) - #gene_tblobj["header"] = self.getGeneTableHeader(refGene=None) - #gene_tblobj["body"] = self.getGeneTableBody(geneCol, refGene=None) - - #sortby = self.getSortByValue() - - #filename= webqtlUtil.genRandStr("Mapping_") - - #objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - #cPickle.dump(gene_tblobj, objfile) - #objfile.close() - - #gene_table = webqtlUtil.genTableObj(tblobj=gene_tblobj, file=filename, sortby=sortby, tableID="sortable", addIndex="0") else: self.gene_table_header = None self.gene_table_body = None - #gene_table = "" - - #return gene_table - def getGeneTableHeader(self, refGene=None): - - gene_tblobj_header = [] + def getGeneTableHeaderList(self, refGene=None): gene_table_header_list = [] - - col_class = "fs14 fwb ffl b1 cw cbrb" - if self.dataset.group.species == "mouse": - if refGene: gene_table_header_list = ["Index", "Symbol", @@ -2744,25 +2014,6 @@ class MarkerRegression(object): "Mb Start (hg19)", "Literature Correlation", "Gene Description"] - #"PolymiRTS Database" + HT.Href(url='http://compbio.uthsc.edu/miRSNP/', text='>>', target="_blank").__str__(), - #"Gene Weaver Info Content" + HT.Href(url='http://geneweaver.org/', text='>>', target="_blank").__str__()] - - # gene_tblobj_header = [[THCell(HT.TD('Index', HT.BR(), HT.BR(), align='left', width=50, Class=col_class), text="index", idx=0), - # THCell(HT.TD('Symbol', HT.BR(), HT.BR(), align='left', width=100, Class=col_class), text="symbol", idx=1), - # THCell(HT.TD('Mb Start',HT.BR(),'(mm9)', align='left', width=100, Class=col_class), text="mb_start_mm9", idx=2), - # THCell(HT.TD('Length (Kb)', HT.BR(), HT.BR(), align='left', width=100, Class=col_class), text="length", idx=3), - # THCell(HT.TD('SNP',HT.BR(),'Count', align='left', width=47, Class=col_class), text="snp_count", idx=4), - # THCell(HT.TD('SNP',HT.BR(),'Density', align='left', width=78, Class=col_class), text="snp_density", idx=5), - # THCell(HT.TD('Avg',HT.BR(),'Expr', HT.BR(), HT.BR(), align='left', width=44, Class=col_class), sort=0, idx=6), - # THCell(HT.TD('Human',HT.BR(),'Chr', align='left', width=60, Class=col_class), text="human_chr", idx=7), - # THCell(HT.TD('Mb Start',HT.BR(),'(hg19)', align='left', width=100, Class=col_class), text="mb_start_hg19", idx=8), - # THCell(HT.TD('Literature',HT.BR(),'Correlation', align='left', width=100, Class=col_class), text="lit_corr", idx=9), - # THCell(HT.TD('Gene Description', HT.BR(), HT.BR(), align='left', width=290, Class=col_class), text="description", idx=10), - # THCell(HT.TD('PolymiRTS',HT.BR(),'Database', HT.BR(), HT.Href(url='http://compbio.uthsc.edu/miRSNP/', text='>>', target="_blank", Class="normalsize"), - # align='left', width=100, Class=col_class), sort=0, idx=11), - # THCell(HT.TD('Gene Weaver', HT.BR(), 'Info Content', HT.BR(), HT.Href(url='http://geneweaver.org/', text='>>', target="_blank", Class="normalsize"), - # align='left', width=110, Class=col_class), sort=0, idx=12), - # ]] else: gene_table_header_list = ["", "Index", @@ -2775,27 +2026,7 @@ class MarkerRegression(object): "Human Chr", "Mb Start (hg19)", "Gene Description"] - #"PolymiRTS Database" + HT.Href(url='http://compbio.uthsc.edu/miRSNP/', text='>>', target="_blank").__str__(), - #"Gene Weaver Info Content" + HT.Href(url='http://geneweaver.org/', text='>>', target="_blank").__str__()] - - # gene_tblobj_header = [[THCell(HT.TD('Index', HT.BR(), HT.BR(), align='left', width=50, Class=col_class), text="index", idx=0), - # THCell(HT.TD('Symbol', HT.BR(), HT.BR(), align='left', width=100, Class=col_class), text="symbol", idx=1), - # THCell(HT.TD('Mb Start',HT.BR(),'(mm9)', align='left', width=100, Class=col_class), text="mb_start_mm9", idx=2), - # THCell(HT.TD('Length (Kb)', HT.BR(), HT.BR(), align='left', width=100, Class=col_class), text="length", idx=3), - # THCell(HT.TD('SNP',HT.BR(),'Count', align='left', width=47, Class=col_class), text="snp_count", idx=4), - # THCell(HT.TD('SNP',HT.BR(),'Density', align='left', width=78, Class=col_class), text="snp_density", idx=5), - # THCell(HT.TD('Avg',HT.BR(),'Expr', HT.BR(), HT.BR(), align='left', width=44, Class=col_class), sort=0, idx=6), - # THCell(HT.TD('Human',HT.BR(),'Chr', align='left', width=60, Class=col_class), text="human_chr", idx=7), - # THCell(HT.TD('Mb Start',HT.BR(),'(hg19)', align='left', width=100, Class=col_class), text="mb_start_hg19", idx=8), - # THCell(HT.TD('Gene Description', HT.BR(), HT.BR(), align='left', width=290, Class=col_class), text="description", idx=9), - # THCell(HT.TD('PolymiRTS',HT.BR(),'Database', HT.BR(), HT.Href(url='http://compbio.uthsc.edu/miRSNP/', text='>>', target="_blank", Class="normalsize"), - # align='left', width=100, Class=col_class), sort=0, idx=10), - # THCell(HT.TD('Gene Weaver', HT.BR(), 'Info Content', HT.BR(), HT.Href(url='http://geneweaver.org/', text='>>', target="_blank", Class="normalsize"), - # align='left', width=110, Class=col_class), sort=0, idx=11), - # ]] - elif self.dataset.group.species == "rat": - gene_table_header_list = ["", "Index", "Symbol", @@ -2808,52 +2039,13 @@ class MarkerRegression(object): "Mb Start (hg19)", "Gene Description"] - # gene_tblobj_header = [[THCell(HT.TD('Index', HT.BR(), HT.BR(), align='left', width=50, Class=col_class), text="index", idx=0), - # THCell(HT.TD('Symbol', HT.BR(), HT.BR(), align='left', width=100, Class=col_class), text="symbol", idx=1), - # THCell(HT.TD('Mb Start',HT.BR(),'(rn3)', align='left', width=100, Class=col_class), text="mb_start_rn3", idx=2), - # THCell(HT.TD('Length (Kb)', HT.BR(), HT.BR(), align='left', width=100, Class=col_class), text="length", idx=3), - # THCell(HT.TD('Avg',HT.BR(),'Expr', HT.BR(), HT.BR(), align='left', width=44, Class=col_class), sort=0, idx=4), - # THCell(HT.TD('Mouse',HT.BR(),'Chr', align='left', width=60, Class=col_class), text="mouse_chr", idx=5), - # THCell(HT.TD('Mb Start',HT.BR(),'(mm9)', align='left', width=100, Class=col_class), text="mb_start_mm9", idx=6), - # THCell(HT.TD('Human',HT.BR(),'Chr', align='left', width=60, Class=col_class), text="human_chr", idx=7), - # THCell(HT.TD('Mb Start',HT.BR(),'(hg19)', align='left', width=100, Class=col_class), text="mb_start_hg19", idx=8), - # THCell(HT.TD('Gene Description', HT.BR(), HT.BR(), align='left', Class=col_class), text="description", idx=9)]] - - else: - pass - return gene_table_header_list - #return gene_tblobj_header def getGeneTableBody(self, geneCol, refGene=None): - - tblobj_body = [] #contains table rows - className = "fs13 b1 c222" - gene_table_body = [] tableIterationsCnt = 0 if self.dataset.group.species == "mouse": - - # polymiRTS - # http://lily.uthsc.edu:8080/20090422_UTHSC_cuiyan/PolymiRTS_CLS?chrom=2&chrom_from=115&chrom_to=125 - #XZ: We can NOT assume their web service is always on. We must put this block of code in try except. - try: - conn = httplib.HTTPConnection("lily.uthsc.edu:8080") - conn.request("GET", "/20090422_UTHSC_cuiyan/PolymiRTS_CLS?chrom=%s&chrom_from=%s&chrom_to=%s" % (self.genotype[0].name, self.startMb, self.endMb)) - response = conn.getresponse() - data = response.read() - data = data.split() - conn.close() - dic = {} - index = 0 - for i in data: - if index%3==0: - dic[data[index]] = HT.Href(url=data[index+2], text=data[index+1], target="_blank", Class="normalsize") - index = index+1 - except Exception: - dic={} - for gIndex, theGO in enumerate(geneCol): tableIterationsCnt = tableIterationsCnt + 1 @@ -2867,7 +2059,6 @@ class MarkerRegression(object): txEnd = theGO["TxEnd"] theGO["snpDensity"] = theGO["snpCount"]/geneLength if self.ALEX_DEBUG_BOOL_PRINT_GENE_LIST: - #accessionString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=Display&DB=gene&term=%s' % theGO["NM_ID"] geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO["GeneID"] allProbeString = '%s?cmd=sch&gene=%s&alias=1' % (os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), theGO["GeneSymbol"]) @@ -2880,10 +2071,6 @@ class MarkerRegression(object): mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str(int(theGO["TxEnd"]*1000000.0)) +"&pix=620&Submit=submit" - #Used for sorting - mouseStartValue = int(theGO["TxStart"]) - - #the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: if theGO['humanGene']["TxStart"] == '': @@ -2892,20 +2079,11 @@ class MarkerRegression(object): humanStartDisplay = "%0.6f" % theGO['humanGene']["TxStart"] humanChr = theGO['humanGene']["Chromosome"] - if humanChr.find('q'): - humanChrSort = humanChr[:humanChr.find("q")].join(humanChr[(humanChr.find("q")+1):]) #value used when sorting table - elif humanChr.find('p'): - humanChrSort = humanChr[:humanChr.find("p")].join(humanChr[(humanChr.find("p")+1):]) #value used when sorting table - else: - humanChrSort = humanChr humanTxStart = theGO['humanGene']["TxStart"] - #Used for sorting - humanStartValue = float(theGO['humanGene']["TxStart"]) - humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % (humanChr, int(1000000*theGO['humanGene']["TxStart"]), int(1000000*theGO['humanGene']["TxEnd"])) else: - humanStartString = humanChr = humanChrSort = humanStartDisplay = humanStartValue = "--" + humanStartString = humanChr = humanStartDisplay = "--" geneDescription = theGO["GeneDescription"] if len(geneDescription) > 26: @@ -2923,22 +2101,12 @@ class MarkerRegression(object): else: avgExpr = "%0.6f" % avgExpr - # polymiRTS - polymiRTS = ' ' - if dic.has_key(theGO["GeneID"]): - polymiRTS = dic[theGO["GeneID"]] - # If we have a referenceGene then we will show the Literature Correlation if theGO["Chromosome"] == "X": chr_as_int = 19 else: chr_as_int = int(theGO["Chromosome"]) - 1 if refGene: - try: - literatureCorrelation = self.getLiteratureCorrelation(self.cursor,refGene,theGO['GeneID']) - except: - literatureCorrelation = "N/A" - literatureCorrelationString = str(self.getLiteratureCorrelation(self.cursor,refGene,theGO['GeneID']) or "N/A") this_row = [selectCheck.__str__(), @@ -2953,25 +2121,7 @@ class MarkerRegression(object): HT.Href(humanStartString, humanStartDisplay, target="_blank").__str__(), literatureCorrelationString, geneDescription] - #polymiRTS, - - - # this_row.append(TDCell(HT.TD(tableIterationsCnt, selectCheck, width=30, align='right', Class=className), tableIterationsCnt, tableIterationsCnt)) - # this_row.append(TDCell(HT.TD(HT.Href(geneIdString, theGO["GeneSymbol"], target="_blank"), " ", probeSetSearch, align='right', Class=className), theGO["GeneSymbol"], theGO["GeneSymbol"])) - # this_row.append(TDCell(HT.TD(HT.Href(mouseStartString, "%0.6f" % txStart, target="_blank"), align='right', Class=className), str(mouseStartValue), mouseStartValue)) - # this_row.append(TDCell(HT.TD(HT.Href("javascript:centerIntervalMapOnRange2('%s', " % theGO["Chromosome"]+str(txStart-tenPercentLength) + ", " + str(txEnd+tenPercentLength) + ", document.changeViewForm)", "%0.3f" % geneLength), align='right', Class=className), "%0.3f" % geneLength, geneLength)) - # this_row.append(TDCell(HT.TD(snpString, align='right', Class=className), str(theGO["snpCount"]), theGO["snpCount"])) - # this_row.append(TDCell(HT.TD(snpDensityStr, align='right', Class=className), snpDensityStr, theGO["snpDensity"])) - # this_row.append(TDCell(HT.TD(avgExpr, align='right', Class=className), "--", "--")) - # this_row.append(TDCell(HT.TD(humanChr, align='right', Class=className), humanChr, humanChrSort)) - # this_row.append(TDCell(HT.TD(HT.Href(humanStartString, humanStartDisplay, target="_blank"), align='right', Class=className), humanStartDisplay, humanStartValue)) - # this_row.append(TDCell(HT.TD(literatureCorrelationString, align='right', Class=className), literatureCorrelationString, literatureCorrelation)) - # this_row.append(TDCell(HT.TD(geneDescription, align='right', Class=className), geneDescription, geneDescription)) - # this_row.append(TDCell(HT.TD(polymiRTS, align='right', Class=className), "", "")) - # this_row.append(TDCell(HT.TD("", align='right', Class=className), "", "")) - else: - this_row = [selectCheck.__str__(), str(tableIterationsCnt), HT.Href(geneIdString, theGO["GeneSymbol"], target="_blank").__str__() + " " + probeSetSearch.__str__(), @@ -2983,29 +2133,11 @@ class MarkerRegression(object): humanChr, HT.Href(humanStartString, humanStartDisplay, target="_blank").__str__(), geneDescription] - #polymiRTS, - - - # this_row.append(TDCell(HT.TD(tableIterationsCnt, selectCheck, width=30, align='right', Class=className), tableIterationsCnt, tableIterationsCnt)) - # this_row.append(TDCell(HT.TD(HT.Href(geneIdString, theGO["GeneSymbol"], target="_blank"), " ", probeSetSearch, align='right', Class=className), theGO["GeneSymbol"], theGO["GeneSymbol"])) - # this_row.append(TDCell(HT.TD(HT.Href(mouseStartString, "%0.6f" % txStart, target="_blank"), align='right', Class=className), str(mouseStartValue), mouseStartValue)) - # this_row.append(TDCell(HT.TD(HT.Href("javascript:centerIntervalMapOnRange2('%s', " % theGO["Chromosome"]+str(txStart-tenPercentLength) + ", " + str(txEnd+tenPercentLength) + ", document.changeViewForm)", "%0.3f" % geneLength), align='right', Class=className), "%0.3f" % geneLength, geneLength)) - # this_row.append(TDCell(HT.TD(snpString, align='right', Class=className), str(theGO["snpCount"]), theGO["snpCount"])) - # this_row.append(TDCell(HT.TD(snpDensityStr, align='right', Class=className), snpDensityStr, theGO["snpDensity"])) - # this_row.append(TDCell(HT.TD(avgExpr, align='right', Class=className), "--", "--")) - # this_row.append(TDCell(HT.TD(humanChr, align='right', Class=className), humanChr, humanChrSort)) - # this_row.append(TDCell(HT.TD(HT.Href(humanStartString, humanStartDisplay, target="_blank"), align='right', Class=className), humanStartDisplay, humanStartValue)) - # this_row.append(TDCell(HT.TD(geneDescription, align='right', Class=className), geneDescription, geneDescription)) - # this_row.append(TDCell(HT.TD(polymiRTS, align='right', Class=className), "", "")) - # this_row.append(TDCell(HT.TD("", align='right', Class=className), "", "")) gene_table_body.append(this_row) - #tblobj_body.append(this_row) elif self.dataset.group.species == 'rat': - for gIndex, theGO in enumerate(geneCol): - this_row = [] #container for the cells of each row selectCheck = HT.Input(type="checkbox", name="searchResult", Class="checkbox", onClick="highlight(this)").__str__() #checkbox for each row @@ -3022,7 +2154,6 @@ class MarkerRegression(object): chr_as_int = int(theGO["Chromosome"]) - 1 geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) - #geneLengthURL = "javascript:centerIntervalMapOnRange2('%s', %f, %f, document.changeViewForm)" % (theGO["Chromosome"], float(theGO["TxStart"])-(geneLength*0.1), float(theGO["TxEnd"])+(geneLength*0.1)) geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float(theGO["TxStart"])-(geneLength*0.1), float(theGO["TxEnd"])+(geneLength*0.1)) avgExprVal = [] @@ -3041,15 +2172,9 @@ class MarkerRegression(object): #the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: humanChr = theGO['humanGene']["Chromosome"] - if 'q' in humanChr: - humanChrSort = humanChr[:humanChr.find("q")].join(humanChr[(humanChr.find("q")+1):]) #value used when sorting table - elif 'p' in humanChr: - humanChrSort = humanChr[:humanChr.find("p")].join(humanChr[(humanChr.find("p")+1):]) #value used when sorting table - else: - humanChrSort = humanChr humanTxStart = theGO['humanGene']["TxStart"] else: - humanChr = humanTxStart = humanChrSort = "" + humanChr = humanTxStart = "" geneDesc = theGO["GeneDescription"] if geneDesc == "---": @@ -3067,26 +2192,9 @@ class MarkerRegression(object): humanTxStart, geneDesc] - - #this_row.append(TDCell(HT.TD(gIndex + 1, selectCheck, align='left', Class=className), str(gIndex+1), gIndex+1)) - #this_row.append(TDCell(HT.TD(webqtlSearch, geneSymbolNCBI, align='left', Class=className), theGO["GeneSymbol"], theGO["GeneSymbol"])) - #this_row.append(TDCell(HT.TD(theGO["TxStart"], align='left', Class=className), theGO["TxStart"], theGO["TxStart"])) - #this_row.append(TDCell(HT.TD(HT.Href(geneLengthURL, "%0.3f" % (geneLength*1000.0)), align='left', Class=className), "%0.3f" % (geneLength*1000.0), (geneLength*1000.0))) - #this_row.append(TDCell(HT.TD(avgExprVal, align='left', Class=className), "", "")) - #this_row.append(TDCell(HT.TD(mouseChr, align='left', Class=className), mouseChr, mouseChr)) - #this_row.append(TDCell(HT.TD(mouseTxStart, align='left', Class=className), mouseTxStart, mouseTxStart)) - #this_row.append(TDCell(HT.TD(humanChr, align='left', Class=className), humanChr, humanChrSort)) - #this_row.append(TDCell(HT.TD(humanTxStart, align='left', Class=className), humanTxStart, humanTxStart)) - #this_row.append(TDCell(HT.TD(geneDesc, align='left', Class=className), geneDesc, geneDesc)) - gene_table_body.append(this_row) - #tblobj_body.append(this_row) - - else: - pass return gene_table_body - #return tblobj_body def getLiteratureCorrelation(cursor,geneId1=None,geneId2=None): if not geneId1 or not geneId2: @@ -3105,10 +2213,4 @@ class MarkerRegression(object): lCorr = lCorr[0] break except: raise #lCorr = None - return lCorr - - def getSortByValue(self): - - sortby = ("", "") - - return sortby + return lCorr
\ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py index 4de88f00..2f327faf 100644 --- a/wqflask/wqflask/marker_regression/plink_mapping.py +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -10,9 +10,7 @@ logger = utility.logger.getLogger(__name__ ) def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) - gen_pheno_txt_file(dataset, vals) - #gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) @@ -22,12 +20,6 @@ def run_plink(this_trait, dataset, species, vals, maf): count, p_values = parse_plink_output(plink_output_filename, species) - #for marker in self.dataset.group.markers.markers: - # if marker['name'] not in included_markers: - # logger.debug("marker:", marker) - # self.dataset.group.markers.markers.remove(marker) - # #del self.dataset.group.markers.markers[marker] - logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) @@ -108,7 +100,6 @@ def parse_plink_output(output_filename, species): result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb") - header_line = result_fp.readline()# read header line line = result_fp.readline() value_list = [] # initialize value list, this list will include snp, bp and pvalue info @@ -156,11 +147,6 @@ def parse_plink_output(output_filename, species): else: line = result_fp.readline() - #if p_value_list: - # min_p_value = min(p_value_list) - #else: - # min_p_value = 0 - return count, p_value_dict ###################################################### @@ -173,4 +159,4 @@ def build_line_list(line=None): line_list = [item for item in line_list if item <>''] line_list = map(string.strip, line_list) - return line_list + return line_list
\ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index 6b58190f..ffbfb5c5 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -26,7 +26,7 @@ def gen_reaper_results(this_trait, dataset, samples_before, trait_vals, json_dat perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=num_perm) suggestive = perm_output[int(num_perm*0.37-1)] significant = perm_output[int(num_perm*0.95-1)] - highly_significant = perm_output[int(num_perm*0.99-1)] + #highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case json_data['suggestive'] = suggestive json_data['significant'] = significant diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index f3694f0b..41d67012 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -5,15 +5,16 @@ from base.webqtlConfig import TMPDIR from utility import webqtlUtil from utility.tools import locate, TEMPDIR +import utility.logger +logger = utility.logger.getLogger(__name__ ) + def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan): geno_to_rqtl_function(dataset) ## Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_c = ro.r["c"] # Map the c function - r_sum = ro.r["sum"] # Map the sum function plot = ro.r["plot"] # Map the plot function - postscript = ro.r["postscript"] # Map the postscript function png = ro.r["png"] # Map the png function dev_off = ro.r["dev.off"] # Map the device off function @@ -23,17 +24,13 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, scanone = ro.r["scanone"] # Map the scanone function scantwo = ro.r["scantwo"] # Map the scantwo function calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function - read_cross = ro.r["read.cross"] # Map the read.cross function - write_cross = ro.r["write.cross"] # Map the write.cross function GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function crossname = dataset.group.name genofilelocation = locate(crossname + ".geno", "genotype") crossfilelocation = TMPDIR + crossname + ".cross" - #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) - - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available if manhattan_plot: cross_object = calc_genoprob(cross_object) @@ -42,18 +39,14 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype - # for debug: write_cross(cross_object, "csvr", "test.csvr") - # Scan for QTLs - covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix + covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix if pair_scan: - if do_control == "true": # If sum(covar) > 0 we have a covariate matrix - print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16) + if do_control == "true": + logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16) else: - print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) - - #print("Pair scan results:", result_data_frame) + logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" png(file=TEMPDIR+pair_scan_filename) @@ -63,9 +56,9 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, return process_pair_scan_results(result_data_frame) else: if do_control == "true": - print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method) + logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method) else: - print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) + logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) if do_control == "true": @@ -79,7 +72,6 @@ def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, return process_rqtl_results(result_data_frame) def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly - ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -117,15 +109,13 @@ def add_phenotype(cross, pheno_as_string): def create_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) - #print("Marker names of selected covariates:", covariate_names) ro.r('covnames <- c(' + covariate_names + ')') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file - #print("R/qtl matrix of covariates:", ro.r["covariates"]) return ro.r["covariates"] def sanitize_rqtl_phenotype(vals): @@ -149,7 +139,6 @@ def process_pair_scan_results(result): result = result[1] output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - #print("R/qtl scantwo output:", output) for i, line in enumerate(result.iter_row()): marker = {} @@ -175,9 +164,7 @@ def process_rqtl_perm_results(num_perm, results): def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop qtl_results = [] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - #print("R/qtl scanone output:", output) for i, line in enumerate(result.iter_row()): marker = {} @@ -187,5 +174,4 @@ def process_rqtl_results(result): # TODO: how to make this a one liner an marker['lod_score'] = output[i][2] qtl_results.append(marker) - return qtl_results - + return qtl_results
\ No newline at end of file diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index 5321e420..38117a8e 100644 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -6,31 +6,16 @@ import datetime import simplejson as json from flask import request -from flask.ext.sqlalchemy import SQLAlchemy from wqflask import app import sqlalchemy - -from sqlalchemy import (Column, Integer, String, Table, ForeignKey, Unicode, Boolean, DateTime, +from sqlalchemy import (Column, ForeignKey, Unicode, Boolean, DateTime, Text, Index) -from sqlalchemy.orm import relationship, backref +from sqlalchemy.orm import relationship from wqflask.database import Base, init_db - - -# Define models -#roles_users = Table('roles_users', -# Column('user_id', Integer(), ForeignKey('user.the_id')), -# Column('role_id', Integer(), ForeignKey('role.the_id'))) - -#class Role(Base): -# __tablename__ = "role" -# id = Column(Unicode(36), primary_key=True, default=lambda: unicode(uuid.uuid4())) -# name = Column(Unicode(80), unique=True, nullable=False) -# description = Column(Unicode(255)) - class User(Base): __tablename__ = "user" id = Column(Unicode(36), primary_key=True, default=lambda: unicode(uuid.uuid4())) @@ -133,11 +118,6 @@ class User(Base): except IndexError: return None - - #roles = relationship('Role', secondary=roles_users, - # backref=backref('users', lazy='dynamic')) - - class Login(Base): __tablename__ = "login" id = Column(Unicode(36), primary_key=True, default=lambda: unicode(uuid.uuid4())) @@ -177,22 +157,15 @@ class UserCollection(Base): except: return 0 - #@property - #def display_num_members(self): - # return display_collapsible(self.num_members) - def members_as_set(self): return set(json.loads(self.members)) - def display_collapsible(number): if number: return number else: return "" - def user_uuid(): """Unique cookie for a user""" - user_uuid = request.cookies.get('user_uuid') - + user_uuid = request.cookies.get('user_uuid')
\ No newline at end of file diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index b42904a4..63273a29 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -21,9 +21,7 @@ from __future__ import absolute_import, print_function, division import sys -# sys.path.append(".") Never do this in a webserver! -import gc import string import cPickle import os @@ -95,7 +93,6 @@ class NetworkGraph(object): self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) - self.network_data = {} self.nodes_list = [] self.edges_list = [] for trait_db in self.trait_list: @@ -107,9 +104,9 @@ class NetworkGraph(object): corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal - + max_corr = 0 #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used - + for target in self.trait_list: target_trait = target[0] target_db = target[1] @@ -141,7 +138,7 @@ class NetworkGraph(object): continue else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - + if -1 <= sample_r < -0.7: color = "#0000ff" width = 3 @@ -163,10 +160,10 @@ class NetworkGraph(object): else: color = "#000000" width = 0 - + if abs(sample_r) > max_corr: max_corr = abs(sample_r) - + edge_data = {'id' : str(this_trait.name) + '_to_' + str(target_trait.name), 'source' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'target' : str(target_trait.name) + ":" + str(target_trait.dataset.name), @@ -176,11 +173,11 @@ class NetworkGraph(object): 'overlap' : num_overlap, 'color' : color, 'width' : width } - + edge_dict = { 'data' : edge_data } - + self.edges_list.append(edge_dict) - + if trait_db[1].type == "ProbeSet": node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.symbol, @@ -197,19 +194,13 @@ class NetworkGraph(object): 'label' : this_trait.name, 'max_corr' : max_corr } } self.nodes_list.append(node_dict) - - #self.network_data['dataSchema'] = {'nodes' : [{'name' : "label" , 'type' : "string"}], - # 'edges' : [{'name' : "label" , 'type' : "string"}] } - - #self.network_data['data'] = {'nodes' : self.nodes_list, - # 'edges' : self.edges_list } self.elements = json.dumps(self.nodes_list + self.edges_list) - + groups = [] for sample in self.all_sample_list: groups.append(1) - + self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), @@ -217,7 +208,6 @@ class NetworkGraph(object): samples = self.all_sample_list, sample_data = self.sample_data, elements = self.elements,) - # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) def get_trait_db_obs(self, trait_db_list): self.trait_list = [] @@ -229,4 +219,4 @@ class NetworkGraph(object): trait_ob = GeneralTrait(dataset=dataset_ob, name=trait_name, cellid=None) - self.trait_list.append((trait_ob, dataset_ob)) + self.trait_list.append((trait_ob, dataset_ob))
\ No newline at end of file diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index ac3cd366..558372bb 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -1,7 +1,5 @@ from __future__ import print_function, division -import operator - import simplejson as json from pprint import pformat as pf diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 6de5cd5f..8b801396 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -6,7 +6,6 @@ import datetime import cPickle import uuid import json as json -#import pyXLWriter as xl from collections import OrderedDict @@ -24,7 +23,6 @@ from utility import webqtlUtil, Plot, Bunch, helper_functions from base.trait import GeneralTrait from base import data_set from db import webqtlDatabaseFunction -from basicStatistics import BasicStatisticsFunctions from pprint import pformat as pf @@ -74,14 +72,6 @@ class ShowTrait(object): cellid=None) self.trait_vals = Redis.get(self.trait_id).split() - #self.dataset.group.read_genotype_file() - - #if this_trait: - # if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet': - # self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" % - # this_trait.mysqlid) - # heritability = self.cursor.fetchone() - #ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.blatseq @@ -193,7 +183,7 @@ class ShowTrait(object): self.sample_group_types['samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] - self.get_mapping_methods() + self.genofiles = get_genofiles(self.dataset) self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups) @@ -212,28 +202,6 @@ class ShowTrait(object): temp_uuid = self.temp_uuid) self.js_data = js_data - def get_mapping_methods(self): - '''Only display mapping methods when the dataset group's genotype file exists''' - def check_plink_gemma(): - if flat_file_exists("mapping"): - MAPPING_PATH = flat_files("mapping")+"/" - if (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bed") and - (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".map") or - os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bim"))): - return True - return False - - def check_pylmm_rqtl(): - if os.path.isfile(webqtlConfig.GENODIR+self.dataset.group.name+".geno") and (os.path.getsize(webqtlConfig.JSON_GENODIR+self.dataset.group.name+".json") > 0): - return True - else: - return False - - self.genofiles = get_genofiles(self.dataset) - self.use_plink_gemma = check_plink_gemma() - self.use_pylmm_rqtl = check_pylmm_rqtl() - - def build_correlation_tools(self): if self.temp_trait == True: this_group = self.temp_group @@ -246,7 +214,6 @@ class ShowTrait(object): this_group = 'BXD' if this_group: - #dataset_menu = self.dataset.group.datasets() if self.temp_trait == True: dataset_menu = data_set.datasets(this_group) else: @@ -264,7 +231,6 @@ class ShowTrait(object): return_results_menu = return_results_menu, return_results_menu_selected = return_results_menu_selected,) - def make_sample_lists(self): all_samples_ordered = self.dataset.group.all_samples_ordered() @@ -316,10 +282,6 @@ class ShowTrait(object): sample_group_type='primary', header="%s Only" % (self.dataset.group.name)) self.sample_groups = (primary_samples,) - #TODO: Figure out why this if statement is written this way - Zach - #if (other_sample_names or (fd.f1list and this_trait.data.has_key(fd.f1list[0])) - # or (fd.f1list and this_trait.data.has_key(fd.f1list[1]))): - # logger.debug("hjs") self.dataset.group.allsamples = all_samples_ordered def get_nearest_marker(this_trait, this_db): diff --git a/wqflask/wqflask/show_trait/show_trait_page.py b/wqflask/wqflask/show_trait/show_trait_page.py deleted file mode 100644 index de818a64..00000000 --- a/wqflask/wqflask/show_trait/show_trait_page.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from __future__ import division, print_function - -from flask import request - -from htmlgen import HTMLgen2 as HT - -from base import webqtlConfig -from utility import webqtlUtil -from base.webqtlTrait import webqtlTrait -from base.templatePage import templatePage -from DataEditingPage import DataEditingPage - - - -class ShowTraitPage(DataEditingPage): - - def __init__(self, fd, traitInfos = None): - self.fd = fd - - # This sets self.cursor - assert self.openMysql(), "No database" - - # When is traitInfos used? - if traitInfos: - database, ProbeSetID, CellID = traitInfos - else: - print("fd is:", fd) - database = fd['database'] - ProbeSetID = fd['ProbeSetID'] - - CellID = fd.get('CellID') - - - thisTrait = webqtlTrait(db=database, name=ProbeSetID, cellid=CellID, cursor=self.cursor) - - if thisTrait.db.type == "ProbeSet": - - self.cursor.execute('''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers - FROM ProbeSetFreeze WHERE Name = "%s"''' % database) - - indId, indName, indFullName, confidential, AuthorisedUsers = self.cursor.fetchall()[0] - - if confidential == 1: - access_to_confidential_dataset = 0 - - #for the dataset that confidentiality is 1 - #1. 'admin' and 'root' can see all of the dataset - #2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table) - if webqtlConfig.USERDICT[self.privilege] > webqtlConfig.USERDICT['user']: - access_to_confidential_dataset = 1 - else: - AuthorisedUsersList=AuthorisedUsers.split(',') - if AuthorisedUsersList.__contains__(self.userName): - access_to_confidential_dataset = 1 - - if not access_to_confidential_dataset: - #Error, Confidential Database - heading = "Show Database" - detail = ["The %s database you selected is not open to the public \ - at this time, please go back and select other database." % indFullName] - self.error(heading=heading,detail=detail,error="Confidential Database") - return - print("environ:", request.environ) - - # Becuase of proxying remote_addr is probably localhost, so we first try for - # HTTP_X_FORWARDED_FOR - user_ip = request.environ.get('HTTP_X_FORWARDED_FOR') or request.remote_addr # in old app was fd.remote_ip - print("user_ip is:", user_ip) - query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \ - UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400" - self.cursor.execute(query,user_ip) - daycount = self.cursor.fetchall() - if daycount: - daycount = daycount[0][0] - if daycount > webqtlConfig.DAILYMAXIMUM: - heading = "Retrieve Data" - detail = ['For security reasons, the maximum access to a database is \ - %d times per day per ip address. You have reached the limit, please \ - try it again tomorrow.' % webqtlConfig.DAILYMAXIMUM] - self.error(heading=heading,detail=detail) - return - - - if thisTrait.db.type != 'ProbeSet' and thisTrait.cellid: - heading = "Retrieve Data" - detail = ['The Record you requested doesn\'t exist!'] - self.error(heading=heading,detail=detail) - return - - ##identification, etc. - fd.identification = '%s : %s' % (thisTrait.db.shortname,ProbeSetID) - thisTrait.returnURL = webqtlConfig.CGIDIR + webqtlConfig.SCRIPTFILE + '?FormID=showDatabase&database=%s\ - &ProbeSetID=%s&RISet=%s&parentsf1=on' %(database, ProbeSetID, fd['RISet']) - - if CellID: - fd.identification = '%s/%s'%(fd.identification, CellID) - thisTrait.returnURL = '%s&CellID=%s' % (thisTrait.returnURL, CellID) - - thisTrait.retrieveInfo() - thisTrait.retrieveData() - self.updMysql() - self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip) - self.openMysql() - - - ##read genotype file - fd.RISet = thisTrait.riset - fd.readGenotype() - - #if webqtlUtil.ListNotNull(map(lambda x:x.var, thisTrait.data.values())): - if any([x.variance for x in thisTrait.data.values()]): - fd.display_variance = True - fd.formID = 'varianceChoice' - - DataEditingPage.__init__(self, fd, thisTrait) diff --git a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json index b7ebb9ed..d00b52b8 100644 --- a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json +++ b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json @@ -1527,6 +1527,36 @@ } }, "mouse": { + "AIL": { + "Hippocampus mRNA": [ + [ + "844", + "UCSD_AIL_HIP_RNA-Seq_0418", + "UCSD AIL Hippocampus (Apr18) RNA-Seq" + ] + ], + "Phenotypes": [ + [ + "None", + "AILPublish", + "AIL Published Phenotypes" + ] + ], + "Prefrontal Cortex mRNA": [ + [ + "846", + "UCSD_AIL_PFC_RNA-Seq_0418", + "UCSD AIL Prefrontal Cortex (Apr18) RNA-Seq" + ] + ], + "Striatum mRNA": [ + [ + "845", + "UCSD_AIL_STR_RNA-Seq_0418", + "UCSD AIL Striatum (Apr18) RNA-Seq" + ] + ] + }, "AKXD": { "Genotypes": [ [ @@ -2968,7 +2998,7 @@ "BXD-Bone": { "Phenotypes": [ [ - "None", + "650", "BXD-BonePublish", "BXD-Bone Published Phenotypes" ] @@ -3156,6 +3186,13 @@ ] }, "CIE-RMA": { + "Hippocampus mRNA": [ + [ + "831", + "INIA_UTHSC_Hip_AffyMTA1_May17", + "INIA-UTHSC Hippocampus CIE Affy MTA 1.0 GeneLevel (Mar18) RMA" + ] + ], "Midbrain mRNA": [ [ "830", @@ -3826,6 +3863,10 @@ ], "mouse": [ [ + "AIL", + "AIL Advanced Intercross Line" + ], + [ "AKXD", "AKXD" ], @@ -3875,7 +3916,7 @@ ], [ "BXD-Bone", - "BXD Bone" + "BXD Bone Individual Data" ], [ "BXD-Harvested", @@ -4727,6 +4768,24 @@ ] }, "mouse": { + "AIL": [ + [ + "Phenotypes", + "Phenotypes" + ], + [ + "Hippocampus mRNA", + "Hippocampus mRNA" + ], + [ + "Prefrontal Cortex mRNA", + "Prefrontal Cortex mRNA" + ], + [ + "Striatum mRNA", + "Striatum mRNA" + ] + ], "AKXD": [ [ "Genotypes", @@ -5111,6 +5170,10 @@ "Phenotypes" ], [ + "Hippocampus mRNA", + "Hippocampus mRNA" + ], + [ "Midbrain mRNA", "Midbrain mRNA" ] diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index 117d8f12..dfdafaf0 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -789,7 +789,41 @@ type: 'bar' }; root.bar_data = [bar_trace] + + positive_error_vals = [] + negative_error_vals = [] + for (i = 0;i < get_sample_vals(sample_lists[0]).length; i++){ + if (get_sample_errors(sample_lists[0])[i] != undefined) { + positive_error_vals.push(get_sample_vals(sample_lists[0])[i] + get_sample_errors(sample_lists[0])[i]) + negative_error_vals.push(get_sample_vals(sample_lists[0])[i] - get_sample_errors(sample_lists[0])[i]) + } else { + positive_error_vals.push(get_sample_vals(sample_lists[0])[i]) + negative_error_vals.push(get_sample_vals(sample_lists[0])[i]) + } + } + + // Calculate the y axis cutoff to avoid a situation where all bar variation is clustered at the top of the chart + min_y_val = Math.min(...negative_error_vals) + max_y_val = Math.max(...positive_error_vals) + + if (min_y_val == 0) { + range_bottom = 0; + } else { + range_top = max_y_val + Math.abs(max_y_val)*0.1 + range_bottom = min_y_val - Math.abs(min_y_val)*0.1 + if (min_y_val > 0) { + range_bottom = min_y_val - 0.1*Math.abs(min_y_val) + } else if (min_y_val < 0) { + range_bottom = min_y_val + 0.1*min_y_val + } else { + range_bottom = 0 + } + } + var layout = { + yaxis: { + range: [range_bottom, range_top] + }, width: 1200, height: 500, margin: { diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index fb4e19a1..05136ad8 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -94,12 +94,14 @@ <th>Sample p(r)</th> <th>Lit r</th> <th>Tissue r</th> + <th>Tissue p(r)</th> {% else %} <th>Sample rho</th> <th>N</th> <th>Sample p(rho)</th> <th>Lit r</th> <th>Tissue rho</th> + <th>Tissue p(rho)</th> {% endif %} {% elif target_dataset.type == "Publish" %} {% if corr_method == 'pearson' %} @@ -156,8 +158,10 @@ {% endif %} {% if trait.tissue_corr == "" or trait.tissue_corr == 0.000 %} <td align="right">--</td> + <td align="right">--</td> {% else %} <td align="right">{{'%0.3f'|format(trait.tissue_corr)}}</td> + <td align="right">{{'%0.3e'|format(trait.tissue_pvalue)}}</td> {% endif %} {% elif target_dataset.type == "Publish" %} <td>{{ trait.description_display }}</td> @@ -319,7 +323,7 @@ title: 'correlation_results', fieldBoundary: '"', exportOptions: { - columns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + columns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] } } ], @@ -342,7 +346,8 @@ { "type": "natural" }, { "type": "scientific" }, { "type": "natural" }, - { "type": "natural" } + { "type": "natural" }, + { "type": "scientific" } ], "createdRow": function ( row, data, index ) { $('td', row).eq(4).attr('title', $('td', row).eq(4).text()); diff --git a/wqflask/wqflask/templates/heatmap.html b/wqflask/wqflask/templates/heatmap.html index 49d7f962..68880829 100644 --- a/wqflask/wqflask/templates/heatmap.html +++ b/wqflask/wqflask/templates/heatmap.html @@ -39,7 +39,6 @@ <script language="javascript" type="text/javascript" src="/static/new/javascript/lodheatmap.js"></script> <script language="javascript" type="text/javascript" src="/static/new/javascript/lod_chart.js"></script> <script language="javascript" type="text/javascript" src="/static/new/javascript/curvechart.js"></script> -<!-- <script language="javascript" type="text/javascript" src="/static/new/javascript/create_heatmap.js"></script>--> <script language="javascript" type="text/javascript" src="/static/new/javascript/iplotMScanone_noeff.js"></script> <script language="javascript" type="text/javascript" src="/static/packages/underscore/underscore-min.js"></script> diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 5d128b4a..4aad4242 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -16,16 +16,6 @@ {% endblock %} {% block content %} <!-- Start of body --> -<!-- - {% if dataset.type == 'Temp' %} - {{ header("{}".format("Temporary Trait")) }} - {% elif dataset.type != 'Geno' %} - {{ header("{}".format(this_trait.name_header_fmt), - '{}: {}'.format(this_trait.name, this_trait.description_fmt)) }} - {% else %} - {{ header("{}".format(this_trait.name_header_fmt)) }} - {% endif %} ---> <div class="container"> <h2>Trait Data and Analysis for <b>{{ this_trait.name }}</b></h2> {% if this_trait.dataset.type != 'Publish' %} diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index 03590c2c..0ecf1eb9 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -35,7 +35,6 @@ </ul> <div class="tab-content"> - {# if use_pylmm_rqtl and not use_plink_gemma and dataset.group.species != "human" #} {% if dataset.group.mapping_id == "1" %} <div class="tab-pane active" id="gemma"> <div style="padding-top: 20px;" class="form-horizontal"> @@ -70,10 +69,6 @@ </label> </div> </div> - <!-- - </div> - <div style="padding-top: 5px; padding-bottom: 5px; padding-left: 20px;" class="form-horizontal"> - --> <div class="mapping_method_fields form-group"> <label style="text-align: right;" class="col-xs-3 control-label">Covariates</label> <div style="margin-left:20px;" class="col-xs-7"> @@ -99,18 +94,6 @@ </div> </div> </div> -<!-- - <div class="form-group"> - <div class="col-xs-4 controls"> - <label class="col-xs-2 control-label"></label> - <div class="col-xs-4"> - <button id="gemma_bimbam_compute" class="btn submit_special btn-success" data-url="/marker_regression" title="Compute Marker Regression"> - Compute - </button> - </div> - </div> - </div> ---> </div> <div class="tab-pane" id="interval_mapping"> <div style="margin-top: 20px" class="form-horizontal"> diff --git a/wqflask/wqflask/tracer.py b/wqflask/wqflask/tracer.py deleted file mode 100644 index a1043d28..00000000 --- a/wqflask/wqflask/tracer.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import absolute_import, division, print_function - -print("At top of tracer") - -import sys - -#################################################################################### - -# Originally based on http://stackoverflow.com/a/8315566 -def tracefunc(frame, event, arg, indent=[0]): - - func = dict(funcname = frame.f_code.co_name, - filename = frame.f_code.co_filename, - lineno = frame.f_lineno) - - #These are too common to bother printing... - too_common = ( - '/home/sam/ve27/local/lib/python2.7/site-packages/werkzeug/', - '/home/sam/ve27/local/lib/python2.7/site-packages/jinja2/', - ) - - - if func['filename'].startswith(too_common): - return tracefunc - - info = "{funcname} [{filename}: {lineno}]".format(**func) - - if event == "call": - indent[0] += 2 - #print("-" * indent[0] + "> call function", frame.f_code.co_name) - print("-" * indent[0] + "> call function:", info) - elif event == "return": - print("<" + "-" * indent[0], "exit function:", info) - indent[0] -= 2 - return tracefunc - -def turn_on(): - sys.settrace(tracefunc) - print("Tracing turned on!!!!") -#################################################################################### - diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index d652f2e9..15eaf8cc 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -13,7 +13,6 @@ import urlparse import simplejson as json -#from redis import StrictRedis import redis # used for collections Redis = redis.StrictRedis() @@ -42,7 +41,6 @@ from smtplib import SMTP from utility.tools import SMTP_CONNECT, SMTP_USERNAME, SMTP_PASSWORD, LOG_SQL_ALCHEMY THREE_DAYS = 60 * 60 * 24 * 3 -#THREE_DAYS = 45 def timestamp(): return datetime.datetime.utcnow().isoformat() @@ -66,16 +64,6 @@ class AnonUser(object): def set_cookie(response): response.set_cookie(self.cookie_name, self.cookie) - def add_collection(self, new_collection): - collection_dict = dict(name = new_collection.name, - created_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - num_members = new_collection.num_members, - members = new_collection.get_members()) - - Redis.set(self.key, json.dumps(collection_dict)) - Redis.expire(self.key, 60 * 60 * 24 * 5) - def delete_collection(self, collection_name): existing_collections = self.get_collections() updated_collections = [] @@ -171,19 +159,11 @@ class UserSession(object): self.session_id = session_id self.record = Redis.hgetall(self.redis_key) - if not self.record: # This will occur, for example, when the browser has been left open over a long # weekend and the site hasn't been visited by the user self.logged_in = False - ########### Grrr...this won't work because of the way flask handles cookies - # Delete the cookie - #response = make_response(redirect(url_for('login'))) - #response.set_cookie(self.cookie_name, '', expires=0) - #flash( - # "Due to inactivity your session has expired. If you'd like please login again.") - #return response return if Redis.ttl(self.redis_key) < THREE_DAYS: @@ -218,7 +198,6 @@ class UserSession(object): self.db_object = model.User.query.get(self.user_id) return self.db_object - def delete_session(self): # And more importantly delete the redis record Redis.delete(self.cookie_name) @@ -234,12 +213,10 @@ class UsersManager(object): self.users = model.User.query.all() logger.debug("Users are:", self.users) - class UserManager(object): def __init__(self, kw): self.user_id = kw['user_id'] logger.debug("In UserManager locals are:", pf(locals())) - #self.user = model.User.get(user_id) #logger.debug("user is:", user) self.user = model.User.query.get(self.user_id) logger.debug("user is:", self.user) @@ -253,10 +230,8 @@ class UserManager(object): logger.debug(" Confidential:", dataset.check_confidentiality()) #logger.debug(" ---> self.datasets:", self.datasets) - class RegisterUser(object): def __init__(self, kw): - self.thank_you_mode = False self.errors = [] self.user = Bunch() es = kw.get('es_connection', None) @@ -304,7 +279,6 @@ def set_password(password, user): pwfields.algorithm = "pbkdf2" pwfields.hashfunc = "sha256" - #hashfunc = getattr(hashlib, pwfields.hashfunc) # Encoding it to base64 makes storing it in json much easier pwfields.salt = base64.b64encode(os.urandom(32)) @@ -333,7 +307,6 @@ def set_password(password, user): sort_keys=True, ) - class VerificationEmail(object): template_name = "email/verification.txt" key_prefix = "verification_code" @@ -348,7 +321,6 @@ class VerificationEmail(object): ) Redis.set(key, data) - #two_days = 60 * 60 * 24 * 2 Redis.expire(key, THREE_DAYS) to = user.email_address subject = self.subject @@ -463,7 +435,6 @@ def password_reset_step2(): logger.debug("locals are:", locals()) - user = Bunch() password = request.form['password'] set_password(password, user) @@ -489,8 +460,6 @@ class DecodeUser(object): def __init__(self, code_prefix): verify_url_hmac(request.url) - #params = urlparse.parse_qs(url) - self.verification_code = request.args['code'] self.user = self.actual_get_user(code_prefix, self.verification_code) @@ -662,8 +631,6 @@ class LoginUser(object): else: import_col = "false" - #g.cookie_session.import_traits_to_user() - return self.actual_login(user, import_collections=import_col) else: @@ -691,7 +658,6 @@ class LoginUser(object): login_rec.successful = True login_rec.session_id = str(uuid.uuid4()) login_rec.assumed_by = assumed_by - #session_id = "session_id:{}".format(login_rec.session_id) session_id_signature = actual_hmac_creation(login_rec.session_id) session_id_signed = login_rec.session_id + ":" + session_id_signature logger.debug("session_id_signed:", session_id_signed) @@ -726,13 +692,12 @@ def logout(): response.set_cookie(UserSession.cookie_name, '', expires=0) return response - @app.route("/n/forgot_password", methods=['GET']) def forgot_password(): """Entry point for forgotten password""" - print("ARGS: ", request.args) + logger.debug("ARGS: ", request.args) errors = {"no-email": request.args.get("no-email")} - print("ERRORS: ", errors) + logger.debug("ERRORS: ", errors) return render_template("new_security/forgot_password.html", errors=errors) @app.route("/n/forgot_password_submit", methods=('POST',)) @@ -768,8 +733,6 @@ def super_only(): flash("You must be a superuser to access that page.", "alert-error") abort(401) - - @app.route("/manage/users") def manage_users(): super_only() @@ -810,13 +773,11 @@ def assume_identity(): assumed_by = g.user_session.user_id return LoginUser().actual_login(user, assumed_by=assumed_by) - @app.route("/n/register", methods=('GET', 'POST')) def register(): params = None errors = None - params = request.form if request.form else request.args params = params.to_dict(flat=True) es = get_elasticsearch_connection() @@ -833,7 +794,6 @@ def register(): return render_template("new_security/register_user.html", values=params, errors=errors) - ################################# Sign and unsign ##################################### def url_for_hmac(endpoint, **values): @@ -852,7 +812,6 @@ def data_hmac(stringy): """Takes arbitray data string and appends :hmac so we know data hasn't been tampered with""" return stringy + ":" + actual_hmac_creation(stringy) - def verify_url_hmac(url): """Pass in a url that was created with url_hmac and this assures it hasn't been tampered with""" logger.debug("url passed in to verify is:", url) @@ -887,13 +846,6 @@ app.jinja_env.globals.update(url_for_hmac=url_for_hmac, ####################################################################################### -# def send_email(to, subject, body): -# msg = json.dumps(dict(From="no-reply@genenetwork.org", -# To=to, -# Subject=subject, -# Body=body)) -# Redis.rpush("mail_queue", msg) - def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not 'UNKNOWN' TLS is used @@ -918,10 +870,4 @@ def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): class GroupsManager(object): def __init__(self, kw): - self.datasets = create_datasets_list() - - -class RolesManager(object): - def __init__(self): - self.roles = model.Role.query.all() - logger.debug("Roles are:", self.roles) + self.datasets = create_datasets_list()
\ No newline at end of file diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 3c2cca94..49b47123 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -57,7 +57,6 @@ from utility import temp_data from utility.tools import SQL_URI,TEMPDIR,USE_REDIS,USE_GN_SERVER,GN_SERVER_URL,GN_VERSION,JS_TWITTER_POST_FETCHER_PATH,JS_GUIX_PATH, CSS_PATH from utility.helper_functions import get_species_groups -from base import webqtlFormData from base.webqtlConfig import GENERATED_IMAGE_DIR from utility.benchmark import Bench @@ -90,11 +89,6 @@ def shutdown_session(exception=None): db_session.remove() g.db = None -#@app.before_request -#def trace_it(): -# from wqflask import tracer -# tracer.turn_on() - @app.errorhandler(Exception) def handle_bad_request(e): err_msg = str(e) @@ -163,62 +157,37 @@ def css(filename): def twitter(filename): return send_from_directory(JS_TWITTER_POST_FETCHER_PATH, filename) -#@app.route("/data_sharing") -#def data_sharing_page(): -# logger.info("In data_sharing") -# fd = webqtlFormData.webqtlFormData(request.args) -# logger.info("1Have fd") -# sharingInfoObject = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None) -# info, htmlfilelist = sharingInfoObject.getBody(infoupdate="") -# logger.info("type(htmlfilelist):", type(htmlfilelist)) -# htmlfilelist = htmlfilelist.encode("utf-8") -# #template_vars = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None) -# logger.info("1 Made it to rendering") -# return render_template("data_sharing.html", -# info=info, -# htmlfilelist=htmlfilelist) - - @app.route("/search", methods=('GET',)) def search_page(): logger.info("in search_page") logger.info(request.url) - if 'info_database' in request.args: - logger.info("Going to sharing_info_page") - template_vars = sharing_info_page() - if template_vars.redirect_url: - logger.info("Going to redirect") - return flask.redirect(template_vars.redirect_url) - else: - return render_template("data_sharing.html", **template_vars.__dict__) + result = None + if USE_REDIS: + with Bench("Trying Redis cache"): + key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) + logger.debug("key is:", pf(key)) + result = Redis.get(key) + if result: + logger.info("Redis cache hit on search results!") + result = pickle.loads(result) else: - result = None - if USE_REDIS: - with Bench("Trying Redis cache"): - key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) - logger.debug("key is:", pf(key)) - result = Redis.get(key) - if result: - logger.info("Redis cache hit on search results!") - result = pickle.loads(result) - else: - logger.info("Skipping Redis cache (USE_REDIS=False)") + logger.info("Skipping Redis cache (USE_REDIS=False)") - logger.info("request.args is", request.args) - the_search = search_results.SearchResultPage(request.args) - result = the_search.__dict__ - valid_search = result['search_term_exists'] + logger.info("request.args is", request.args) + the_search = search_results.SearchResultPage(request.args) + result = the_search.__dict__ + valid_search = result['search_term_exists'] - logger.debugf("result", result) + logger.debugf("result", result) - if USE_REDIS and valid_search: - Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) - Redis.expire(key, 60*60) + if USE_REDIS and valid_search: + Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) + Redis.expire(key, 60*60) - if valid_search: - return render_template("search_result_page.html", **result) - else: - return render_template("search_error.html") + if valid_search: + return render_template("search_result_page.html", **result) + else: + return render_template("search_error.html") @app.route("/gsearch", methods=('GET',)) def gsearchact(): @@ -741,7 +710,6 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - #fd = webqtlFormData.webqtlFormData(request.form) template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) @@ -777,15 +745,6 @@ def submit_bnw(): template_vars = get_bnw_input(request.form) return render_template("empty_collection.html", **{'tool':'Correlation Matrix'}) -# Todo: Can we simplify this? -Sam -def sharing_info_page(): - """Info page displayed when the user clicks the "Info" button next to the dataset selection""" - logger.info("In sharing_info_page") - logger.info(request.url) - fd = webqtlFormData.webqtlFormData(request.args) - template_vars = SharingInfoPage.SharingInfoPage(fd) - return template_vars - # Take this out or secure it before putting into production @app.route("/get_temp_data") def get_temp_data(): |