From 183f9a0ba19b6fcdf1475285af1bb1fcd45a9442 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Fri, 20 Sep 2013 17:20:52 -0500 Subject: Tissue correlation results work for sample r/rho correlation page and are written to the template --- misc/gn_installation_notes.txt | 14 +- wqflask/base/mrna_assay_tissue_data.py | 152 +++++++++++---------- .../wqflask/correlation/correlation_functions.py | 12 +- wqflask/wqflask/correlation/show_corr_results.py | 65 ++++++--- wqflask/wqflask/templates/correlation_page.html | 9 +- 5 files changed, 157 insertions(+), 95 deletions(-) diff --git a/misc/gn_installation_notes.txt b/misc/gn_installation_notes.txt index 6329586f..a73e7d4f 100644 --- a/misc/gn_installation_notes.txt +++ b/misc/gn_installation_notes.txt @@ -272,6 +272,12 @@ sudo apt-get install r-base-dev =========================================== +Installing rpy2 + +pip install rpy2 + +=========================================== + Install Parallel Python (pp) wget http://www.parallelpython.com/downloads/pp/pp-1.6.3.tar.gz @@ -303,7 +309,13 @@ To get server running: !If having seemingly inexplicable problems with imports, make sure I've started the environment! Start up virtual environment: -source ~/ve27/bin/activate +source ~/ve27/bin/activate============== + +Install requests + +pip install requests + +======================= To set WQFLASK_SETTINGS environment variable: export WQFLASK_SETTINGS=~/zach_settings.py (or wherever file is located) diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 8ae71858..7eb07028 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -4,11 +4,13 @@ import collections from flask import g -from utility import dbtools -from uitility import Bunch +from utility import db_tools +from utility import Bunch from MySQLdb import escape_string as escape +from pprint import pformat as pf + class MrnaAssayTissueData(object): def __init__(self, gene_symbols=None): @@ -35,14 +37,15 @@ class MrnaAssayTissueData(object): # Note that inner join is necessary in this query to get distinct record in one symbol group # with highest mean value # Due to the limit size of TissueProbeSetFreezeId table in DB, - # performance of inner join is acceptable. + # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) + print("len(gene_symbols): ", len(gene_symbols)) if len(gene_symbols) == 0: query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; ''' else: - in_clause = dbtools.create_in_clause(gene_symbols) + in_clause = db_tools.create_in_clause(gene_symbols) query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol @@ -51,17 +54,19 @@ class MrnaAssayTissueData(object): results = g.db.execute(query).fetchall() for result in results: - symbol = item[0] - gene_symbols.append(symbol) - symbol = symbol.lower() - - self.data[symbol].gene_id = result.GeneId - self.data[symbol].data_id = result.DataId - self.data[symbol].chr = result.Chr - self.data[symbol].mb = result.Mb - self.data[symbol].description = result.description - self.data[symbol].probe_target_description = result.Probe_Target_Description + symbol = result[0] + if symbol in gene_symbols: + #gene_symbols.append(symbol) + symbol = symbol.lower() + + self.data[symbol].gene_id = result.GeneId + self.data[symbol].data_id = result.DataId + self.data[symbol].chr = result.Chr + self.data[symbol].mb = result.Mb + self.data[symbol].description = result.description + self.data[symbol].probe_target_description = result.Probe_Target_Description + #print("self.data: ", pf(self.data)) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -70,65 +75,72 @@ class MrnaAssayTissueData(object): #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). #Attention! All keys are lower case! ########################################################################### - def get_symbol_value_pairs(self): - - id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data] - - symbol_value_pairs = {} - value_list=[] - query = """SELECT value, id - FROM TissueProbeSetData - WHERE Id IN {}""".format(create_in_clause(id_list)) - - try : - results = g.db.execute(query).fetchall() - for result in results: - value_list.append(result.value) - symbol_value_pairs[symbol] = value_list - except: - symbol_value_pairs[symbol] = None - - #for symbol in symbol_list: - # if tissue_data.has_key(symbol): - # data_id = tissue_data[symbol].data_id - # - # query = """select value, id - # from TissueProbeSetData - # where Id={}""".format(escape(data_id)) - # try : - # results = g.db.execute(query).fetchall() - # for item in results: - # item = item[0] - # value_list.append(item) - # symbol_value_pairs[symbol] = value_list - # value_list=[] - # except: - # symbol_value_pairs[symbol] = None + def get_symbol_values_pairs(self): + id_list = [self.data[symbol].data_id for symbol in self.data] + + symbol_values_dict = {} + + query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value + FROM TissueProbeSetXRef, TissueProbeSetData + WHERE TissueProbeSetData.Id IN {} and + TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + + results = g.db.execute(query).fetchall() + for result in results: + if result.Symbol.lower() not in symbol_values_dict: + symbol_values_dict[result.Symbol.lower()] = [result.value] + else: + symbol_values_dict[result.Symbol.lower()].append(result.value) + + #for symbol in self.data: + # data_id = self.data[symbol].data_id + # symbol_values_dict[symbol] = self.get_tissue_values(data_id) + - return symbol_value_pairs + return symbol_values_dict - ######################################################################################################## - #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol - #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. - # key is symbol, value is one list of expression values of one probeSet. - #function: wrapper function for getSymbolValuePairDict function - # build gene symbol list if necessary, cut it into small lists if necessary, - # then call getSymbolValuePairDict function and merge the results. - ######################################################################################################## - def get_trait_symbol_and_tissue_values(symbol_list=None): - tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) + #def get_tissue_values(self, data_id): + # """Gets the tissue values for a particular gene""" + # + # tissue_values=[] + # + # query = """SELECT value, id + # FROM TissueProbeSetData + # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id)) + # + # #try : + # results = g.db.execute(query).fetchall() + # for result in results: + # tissue_values.append(result.value) + # #symbol_values_dict[symbol] = value_list + # #except: + # # symbol_values_pairs[symbol] = None + # + # return tissue_values - #symbolList, - #geneIdDict, - #dataIdDict, - #ChrDict, - #MbDict, - #descDict, - #pTargetDescDict = getTissueProbeSetXRefInfo( - # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - if len(tissue_data.gene_symbols): - return get_symbol_value_pairs(tissue_data) +######################################################################################################## +#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# key is symbol, value is one list of expression values of one probeSet. +#function: wrapper function for getSymbolValuePairDict function +# build gene symbol list if necessary, cut it into small lists if necessary, +# then call getSymbolValuePairDict function and merge the results. +######################################################################################################## + +#def get_trait_symbol_and_tissue_values(symbol_list=None): +# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) +# +# #symbolList, +# #geneIdDict, +# #dataIdDict, +# #ChrDict, +# #MbDict, +# #descDict, +# #pTargetDescDict = getTissueProbeSetXRefInfo( +# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) +# +# if len(tissue_data.gene_symbols): +# return get_symbol_values_pairs(tissue_data) diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index 56f66810..84d47bb5 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -27,7 +27,7 @@ from __future__ import absolute_import, print_function, division import math -#import rpy2.robjects +import rpy2.robjects import pp import string @@ -494,7 +494,7 @@ pcor.rec <- function(x,y,z,method="p",na.rm=T){ #XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait #XZ: The primaryTrait and targetTrait should have executed retrieveData function -def calZeroOrderCorr (primaryTrait, targetTrait, method='pearson'): +def calZeroOrderCorr(primaryTrait, targetTrait, method='pearson'): #primaryTrait.retrieveData() @@ -560,7 +560,7 @@ def calZeroOrderCorr (primaryTrait, targetTrait, method='pearson'): #the same tissue order ##################################################################################### -def calZeroOrderCorrForTiss (primaryValue=[], targetValue=[], method='pearson'): +def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pearson'): R_primary = rpy2.robjects.FloatVector(range(len(primaryValue))) N = len(primaryValue) @@ -809,6 +809,9 @@ def get_trait_symbol_and_tissue_values(symbol_list=None): tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) + if len(tissue_data.gene_symbols): + return tissue_data.get_symbol_values_pairs() + #symbolList, #geneIdDict, #dataIdDict, @@ -818,9 +821,6 @@ def get_trait_symbol_and_tissue_values(symbol_list=None): #pTargetDescDict = getTissueProbeSetXRefInfo( # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - if len(tissue_data.gene_symbols): - return get_symbol_value_pairs(tissue_data) - #limit_num=1000 #count = len(symbol_list) # diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index b17e1db1..b9d009af 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -105,7 +105,7 @@ class CorrelationResults(object): self.sample_data = {} self.corr_method = start_vars['corr_sample_method'] - self.return_number = 500 + self.return_number = 50 #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples @@ -158,15 +158,27 @@ class CorrelationResults(object): for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True) + + print("gene symbol: ", trait_object.symbol) + trait_object.sample_r = self.correlation_data[trait][0] trait_object.sample_p = self.correlation_data[trait][1] trait_object.num_overlap = self.correlation_data[trait][2] + #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, + #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) + + + # Set some sane defaults - trait_object.tissue_corr = None - trait_object.tissue_pvalue = None + trait_object.tissue_corr = 0 + trait_object.tissue_pvalue = 0 self.correlation_results.append(trait_object) + + self.do_tissue_correlation_by_list() + + print("self.correlation_results: ", pf(self.correlation_results)) @@ -183,7 +195,7 @@ class CorrelationResults(object): # mb = trait_object.mb # ) # if trait_object.mean: - # trait_info[mean] = trait_object.mean + #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias, # trait_info[mean] = trait_object.mean # if hasattr(trait_object, 'mean'): # trait_info[mean] = trait_object.mean # if hasattr(trait_object, 'lrs'): @@ -197,7 +209,8 @@ class CorrelationResults(object): # correlation = float(self.correlation_data[trait][0]), # p_value = float(self.correlation_data[trait][1]), # symbol = trait_object.symbol, - # alias = trait_object.alias, + # alias = trai + #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias, # description = trait_object.description, # chromosome = trait_object.chr, # mb = trait_object.mb @@ -637,7 +650,15 @@ class CorrelationResults(object): for entry in results: trait_name, tissue_corr, tissue_pvalue = entry tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue) - + #symbolList, + #geneIdDict, + #dataIdDict, + #ChrDict, + #MbDict, + #descDict, + #pTargetDescDict = getTissueProbeSetXRefInfo( + # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) + g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))) return tissue_corr_dict @@ -944,13 +965,17 @@ class CorrelationResults(object): return (symbolCorrDict, symbolPvalueDict) - def do_tissue_correlation_by_list(self, tissue_dataset_id): + def do_tissue_correlation_by_list(self, tissue_dataset_id=1): + """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" - trait_symbol_and_values = correlation_functions.get_trait_symbol_and_tissue_values( - gene_name_list = [self.this_trait.symbol]) + #Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list = [self.this_trait.symbol]) + + print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict)) - if self.this_trait.symbol.lower() in trait_symbol_and_values: - primary_trait_value = trait_symbol_and_values[self.this_trait_symbol.lower()] + if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] #gene_symbol_list = [] # @@ -960,19 +985,25 @@ class CorrelationResults(object): gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] - symbol_value_dict = correlation_functions.get_trait_gene_symbol_and_tissue_values( - gene_symbol_list=gene_symbol_list) + corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=gene_symbol_list) + + print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) for trait in self.correlation_results: - if trait.symbol and trait.symbol.lower() in symbol_value_dict: - this_trait_value = symbol_value_dict[trait.symbol.lower()] + if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] - result = correlation_functions.calZeroOrderCorrForTiss(primary_trait_value, - this_trait_value, + result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, + this_trait_tissue_values, self.corr_method) trait.tissue_corr = result[0] trait.tissue_pvalue = result[2] + + #print("trait.tissue_corr / pvalue: ", str(trait.tissue_corr) + " :: " + str(trait.tissue_pvalue)) + + # else: # trait.tissue_corr = None # trait.tissue_pvalue = None diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index 53b12545..7082dbf2 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -28,10 +28,15 @@