From 183f9a0ba19b6fcdf1475285af1bb1fcd45a9442 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Fri, 20 Sep 2013 17:20:52 -0500 Subject: Tissue correlation results work for sample r/rho correlation page and are written to the template --- wqflask/base/mrna_assay_tissue_data.py | 152 ++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 70 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 8ae71858..7eb07028 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -4,11 +4,13 @@ import collections from flask import g -from utility import dbtools -from uitility import Bunch +from utility import db_tools +from utility import Bunch from MySQLdb import escape_string as escape +from pprint import pformat as pf + class MrnaAssayTissueData(object): def __init__(self, gene_symbols=None): @@ -35,14 +37,15 @@ class MrnaAssayTissueData(object): # Note that inner join is necessary in this query to get distinct record in one symbol group # with highest mean value # Due to the limit size of TissueProbeSetFreezeId table in DB, - # performance of inner join is acceptable. + # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) + print("len(gene_symbols): ", len(gene_symbols)) if len(gene_symbols) == 0: query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; ''' else: - in_clause = dbtools.create_in_clause(gene_symbols) + in_clause = db_tools.create_in_clause(gene_symbols) query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol @@ -51,17 +54,19 @@ class MrnaAssayTissueData(object): results = g.db.execute(query).fetchall() for result in results: - symbol = item[0] - gene_symbols.append(symbol) - symbol = symbol.lower() - - self.data[symbol].gene_id = result.GeneId - self.data[symbol].data_id = result.DataId - self.data[symbol].chr = result.Chr - self.data[symbol].mb = result.Mb - self.data[symbol].description = result.description - self.data[symbol].probe_target_description = result.Probe_Target_Description + symbol = result[0] + if symbol in gene_symbols: + #gene_symbols.append(symbol) + symbol = symbol.lower() + + self.data[symbol].gene_id = result.GeneId + self.data[symbol].data_id = result.DataId + self.data[symbol].chr = result.Chr + self.data[symbol].mb = result.Mb + self.data[symbol].description = result.description + self.data[symbol].probe_target_description = result.Probe_Target_Description + #print("self.data: ", pf(self.data)) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -70,65 +75,72 @@ class MrnaAssayTissueData(object): #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). #Attention! All keys are lower case! ########################################################################### - def get_symbol_value_pairs(self): - - id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data] - - symbol_value_pairs = {} - value_list=[] - query = """SELECT value, id - FROM TissueProbeSetData - WHERE Id IN {}""".format(create_in_clause(id_list)) - - try : - results = g.db.execute(query).fetchall() - for result in results: - value_list.append(result.value) - symbol_value_pairs[symbol] = value_list - except: - symbol_value_pairs[symbol] = None - - #for symbol in symbol_list: - # if tissue_data.has_key(symbol): - # data_id = tissue_data[symbol].data_id - # - # query = """select value, id - # from TissueProbeSetData - # where Id={}""".format(escape(data_id)) - # try : - # results = g.db.execute(query).fetchall() - # for item in results: - # item = item[0] - # value_list.append(item) - # symbol_value_pairs[symbol] = value_list - # value_list=[] - # except: - # symbol_value_pairs[symbol] = None + def get_symbol_values_pairs(self): + id_list = [self.data[symbol].data_id for symbol in self.data] + + symbol_values_dict = {} + + query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value + FROM TissueProbeSetXRef, TissueProbeSetData + WHERE TissueProbeSetData.Id IN {} and + TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + + results = g.db.execute(query).fetchall() + for result in results: + if result.Symbol.lower() not in symbol_values_dict: + symbol_values_dict[result.Symbol.lower()] = [result.value] + else: + symbol_values_dict[result.Symbol.lower()].append(result.value) + + #for symbol in self.data: + # data_id = self.data[symbol].data_id + # symbol_values_dict[symbol] = self.get_tissue_values(data_id) + - return symbol_value_pairs + return symbol_values_dict - ######################################################################################################## - #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol - #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. - # key is symbol, value is one list of expression values of one probeSet. - #function: wrapper function for getSymbolValuePairDict function - # build gene symbol list if necessary, cut it into small lists if necessary, - # then call getSymbolValuePairDict function and merge the results. - ######################################################################################################## - def get_trait_symbol_and_tissue_values(symbol_list=None): - tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) + #def get_tissue_values(self, data_id): + # """Gets the tissue values for a particular gene""" + # + # tissue_values=[] + # + # query = """SELECT value, id + # FROM TissueProbeSetData + # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id)) + # + # #try : + # results = g.db.execute(query).fetchall() + # for result in results: + # tissue_values.append(result.value) + # #symbol_values_dict[symbol] = value_list + # #except: + # # symbol_values_pairs[symbol] = None + # + # return tissue_values - #symbolList, - #geneIdDict, - #dataIdDict, - #ChrDict, - #MbDict, - #descDict, - #pTargetDescDict = getTissueProbeSetXRefInfo( - # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - if len(tissue_data.gene_symbols): - return get_symbol_value_pairs(tissue_data) +######################################################################################################## +#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# key is symbol, value is one list of expression values of one probeSet. +#function: wrapper function for getSymbolValuePairDict function +# build gene symbol list if necessary, cut it into small lists if necessary, +# then call getSymbolValuePairDict function and merge the results. +######################################################################################################## + +#def get_trait_symbol_and_tissue_values(symbol_list=None): +# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) +# +# #symbolList, +# #geneIdDict, +# #dataIdDict, +# #ChrDict, +# #MbDict, +# #descDict, +# #pTargetDescDict = getTissueProbeSetXRefInfo( +# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) +# +# if len(tissue_data.gene_symbols): +# return get_symbol_values_pairs(tissue_data) -- cgit v1.2.3 From 081f4f222a261c0d84bfb266aa4a32d6d62cab85 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 24 Sep 2013 18:08:23 -0500 Subject: Did some work towards doing the tissue correlation for all traits in a dataset (in order to sort by tissue correlation instead of sample correlation). --- wqflask/base/data_set.py | 14 +- wqflask/maintenance/quick_search_table.py | 6 +- .../wqflask/correlation/correlation_functions.py | 2 - wqflask/wqflask/correlation/show_corr_results.py | 545 +++++++++++---------- .../show_trait_calculate_correlations.html | 2 +- 5 files changed, 309 insertions(+), 260 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 96e04df0..5d21c901 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1055,7 +1055,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() return results[0] - def retrieve_sample_data(self, trait): query = """ @@ -1077,6 +1076,19 @@ class MrnaAssayDataSet(DataSet): results = g.db.execute(query).fetchall() return results + def retrieve_gene_symbols(self): + query = """ + select ProbeSet.Name, ProbeSet.Symbol + from ProbeSet,ProbeSetXRef + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSetXRef.ProbeSetId=ProbeSet.Id; + """ % (self.id) + results = g.db.execute(query).fetchall() + symbol_dict = {} + for item in results: + symbol_dict[item[0]] = item[1] + return symbol_dict + class TempDataSet(DataSet): '''Temporary user-generated data set''' diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 9cd792ef..eef61857 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -11,10 +11,10 @@ each trait, its dataset, and several columns determined by its trait type (pheno """ -from __future__ import print_function, division, absolute_import +from __future__ import absolute_import, division, print_function -import sys -sys.path.append("../../..") +#import sys +#sys.path.append("../../..") import simplejson as json diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index 84d47bb5..da5c3197 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -805,8 +805,6 @@ def get_symbol_value_pairs(tissue_data): ######################################################################################################## def get_trait_symbol_and_tissue_values(symbol_list=None): - SymbolValuePairDict={} - tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) if len(tissue_data.gene_symbols): diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index b9d009af..c6bc5b2a 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -95,8 +95,7 @@ class CorrelationResults(object): #self.this_trait = GeneralTrait(dataset=self.dataset.name, # name=start_vars['trait_id'], - # cellid=None) - #print("start_vars: ", pf(start_vars)) + # cellid=None) with Bench("Doing correlations"): helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() @@ -104,6 +103,7 @@ class CorrelationResults(object): corr_samples_group = start_vars['corr_samples_group'] self.sample_data = {} + self.corr_type = start_vars['corr_type'] self.corr_method = start_vars['corr_sample_method'] self.return_number = 50 @@ -127,36 +127,61 @@ class CorrelationResults(object): self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() + self.correlation_results = [] self.correlation_data = {} - for trait, values in self.target_dataset.trait_data.iteritems(): - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(self.target_dataset.samplelist): - if sample in self.sample_data: - sample_value = self.sample_data[sample] - target_sample_value = values[index] - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( - this_trait_vals, target_vals) - - if self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] - - self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), - key=lambda t: -abs(t[1][0]))) - self.correlation_results = [] - - #self.correlation_data_slice = collections.OrderedDict() + if self.corr_type == "tissue": + trait_symbol_dict = self.dataset.retrieve_gene_symbols() + trait_symbols = trait_symbol_dict.values + + tissue_corr_data = self.do_tissue_corr_for_all_traits(gene_symbol_list=trait_symbols) + + for trait in tissue_corr_data.keys()[:self.return_number]: + this_trait_vals = [] + target_vals = [] + for index, sample in enumerate(self.target_dataset.samplelist): + if sample in self.sample_data: + sample_value = self.sample_data[sample] + target_sample_value = self.target_dataset.trait_data[trait][index] + this_trait_vals.append(sample_value) + target_vals.append(target_sample_value) + + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) + + if self.corr_method == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + + elif self.corr_type == "sample": + for trait, values in self.target_dataset.trait_data.iteritems(): + this_trait_vals = [] + target_vals = [] + for index, sample in enumerate(self.target_dataset.samplelist): + if sample in self.sample_data: + sample_value = self.sample_data[sample] + target_sample_value = values[index] + this_trait_vals.append(sample_value) + target_vals.append(target_sample_value) + + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) + + if self.corr_method == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + + self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), + key=lambda t: -abs(t[1][0]))) - for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): + for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True) print("gene symbol: ", trait_object.symbol) @@ -168,63 +193,21 @@ class CorrelationResults(object): #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) - - - # Set some sane defaults - trait_object.tissue_corr = 0 - trait_object.tissue_pvalue = 0 - + if self.corr_method != "tissue": + # Set some sane defaults + trait_object.tissue_corr = 0 + trait_object.tissue_pvalue = 0 + else: + trait_object.tissue_corr = tissue_corr_data[trait][1] + trait_object.tissue_pvalue = tissue_corr_data[trait][2] + self.correlation_results.append(trait_object) - self.do_tissue_correlation_by_list() + if self.corr_method != "tissue": + self.do_tissue_correlation_for_trait_list() print("self.correlation_results: ", pf(self.correlation_results)) - - - #self.correlation_data_slice[trait] = self.correlation_data[trait] - #self.correlation_data_slice[trait].append(trait_object) - #if self.dataset.type == 'ProbeSet': - # trait_info = collections.OrderedDict( - # correlation = float(self.correlation_data[trait][0]), - # p_value = float(self.correlation_data[trait][1]), - # symbol = trait_object.symbol, - # alias = trait_object.alias, - # description = trait_object.description, - # chromosome = trait_object.chr, - # mb = trait_object.mb - # ) - # if trait_object.mean: - #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias, # trait_info[mean] = trait_object.mean - # if hasattr(trait_object, 'mean'): - # trait_info[mean] = trait_object.mean - # if hasattr(trait_object, 'lrs'): - # trait_info[lrs] = trait_object.lrs - # if hasattr(trait_object, 'locus_chr'): - # trait_info[locus_chr] = trait_object.locus_chr - # if hasattr(trait_object, 'locus_mb'): - # trait_info[locus_mb] = trait_object.locus_mb - #elif self.dataset.type == 'Geno': - # trait_info = collections.OrderedDict( - # correlation = float(self.correlation_data[trait][0]), - # p_value = float(self.correlation_data[trait][1]), - # symbol = trait_object.symbol, - # alias = trai - #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias, - # description = trait_object.description, - # chromosome = trait_object.chr, - # mb = trait_object.mb - # ) - #else: # 'Publish' - # trait_info = collections.OrderedDict( - # correlation = float(self.correlation_data[trait][0]), - # p_value = float(self.correlation_data[trait][1]), - # symbol = trait_object.symbol, - # alias = trait_object.alias, - # description = trait_object.description, - # chromosome = trait_object.chr, - # mb = trait_object.mb - # ) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset @@ -278,6 +261,210 @@ class CorrelationResults(object): ############################################################################################################################################ + def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1): + """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" + + #Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list = [self.this_trait.symbol]) + + print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict)) + + if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + + #gene_symbol_list = [] + # + #for trait in self.correlation_results: + # if hasattr(trait, 'symbol'): + # gene_symbol_list.append(trait.symbol) + + gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] + + corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=gene_symbol_list) + + print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) + + for trait in self.correlation_results: + if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] + + result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, + this_trait_tissue_values, + self.corr_method) + + trait.tissue_corr = result[0] + trait.tissue_pvalue = result[2] + + # else: + # trait.tissue_corr = None + # trait.tissue_pvalue = None + #else: + # for trait in self.correlation_results: + # trait.tissue_corr = None + # trait.tissue_pvalue = None + + #return self.correlation_results + + + def do_tissue_corr_for_all_traits(self, trait_symbols, tissue_dataset_id=1): + #Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list = [self.this_trait.symbol]) + + correlation_data = {} + if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + + corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=trait_symbols.values) + + print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) + + for trait, symbol in trait_symbols.iteritems(): + if symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + + result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, + this_trait_tissue_values, + self.corr_method) + + correlation_results[trait] = [symbol, result[0], result[2]] + + correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), + key=lambda t: -abs(t[1][1]))) + + return correlation_data + + + + def do_tissue_corr_for_all_traits_2(self): + """Comments Possibly Out of Date!!!!! + + Uses get_temp_tissue_corr_table to generate table of tissue correlations + + This function then gathers that data and pairs it with the TraitID string. + Takes as its arguments a formdata instance, and a dataset instance. + Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) + for the requested correlation + + Used when the user selects the tissue correlation method; i.e. not for the + column that is appended to all probeset trait correlation tables + + """ + + # table name string + temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB, + method=method) + + query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol + WHERE ProbeSetFreeze.Name = '{}' + and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSet.Symbol IS NOT NULL + and {}.Correlation IS NOT NULL""".format(dataset.mescape( + temp_table, temp_table, temp_table, temp_table, + self.dataset.name, temp_table)) + + results = g.db.execute(query).fetchall() + + tissue_corr_dict = {} + + for entry in results: + trait_name, tissue_corr, tissue_pvalue = entry + tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue) + #symbolList, + #geneIdDict, + #dataIdDict, + #ChrDict, + #MbDict, + #descDict, + #pTargetDescDict = getTissueProbeSetXRefInfo( + # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) + + g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))) + + return tissue_corr_dict + + + #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2 + #XZ, 09/24/2008: Note that the correlation value can be negative. + def get_temp_tissue_corr_table(self, + tissue_probesetfreeze_id=0, + method="", + return_number=0): + + + def cmp_tisscorr_absolute_value(A, B): + try: + if abs(A[1]) < abs(B[1]): return 1 + elif abs(A[1]) == abs(B[1]): + return 0 + else: return -1 + except: + return 0 + + symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues( + tissue_dataset_id=TISSUE_MOUSE_DB) + + symbol_corr_list = symbol_corr_dict.items() + + symbol_corr_list.sort(cmp_tisscorr_absolute_value) + symbol_corr_list = symbol_corr_list[0 : 2*return_number] + + tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE") + + q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name + self.cursor.execute(q1) + + for one_pair in symbol_corr_list: + one_symbol = one_pair[0] + one_corr = one_pair[1] + one_p_value = symbol_pvalue_dict[one_symbol] + + self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) ) + + return tmp_table_name + + + def calculate_corr_for_all_tissues(self, tissue_dataset_id=None): + + symbol_corr_dict = {} + symbol_pvalue_dict = {} + + primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( + GeneNameLst=[self.this_trait.symbol], + TissueProbeSetFreezeId=tissue_dataset_id) + primary_trait_value = primary_trait_symbol_value_dict.values()[0] + + symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( + gene_name_list=[], + tissue_dataset_id=tissue_dataset_id) + + symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( + primaryTraitValue, + SymbolValueDict, + method=self.corr_method) + #else: + # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( + # primaryTraitValue, + # SymbolValueDict) + + return (symbolCorrDict, symbolPvalueDict) + + ##XZ, 12/16/2008: the input geneid is of mouse type + #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): + # q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) + # self.cursor.execute(q) + # try: + # x = self.cursor.fetchone() + # if x: return True + # else: raise + # except: return False + def get_all_dataset_data(self): @@ -353,6 +540,8 @@ class CorrelationResults(object): return mouse_geneid + + ##XZ, 12/16/2008: the input geneid is of mouse type #def checkForLitInfo(self,geneId): # q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId @@ -364,16 +553,6 @@ class CorrelationResults(object): # except: return False - ##XZ, 12/16/2008: the input geneid is of mouse type - #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): - # q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) - # self.cursor.execute(q) - # try: - # x = self.cursor.fetchone() - # if x: return True - # else: raise - # except: return False - def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId): @@ -545,46 +724,6 @@ class CorrelationResults(object): - #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2 - #XZ, 09/24/2008: Note that the correlation value can be negative. - def get_temp_tissue_corr_table(self, - tissue_probesetfreeze_id=0, - method="", - return_number=0): - - - def cmp_tisscorr_absolute_value(A, B): - try: - if abs(A[1]) < abs(B[1]): return 1 - elif abs(A[1]) == abs(B[1]): - return 0 - else: return -1 - except: - return 0 - - symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues( - tissue_dataset_id=TISSUE_MOUSE_DB) - - symbol_corr_list = symbol_corr_dict.items() - - symbol_corr_list.sort(cmp_tisscorr_absolute_value) - symbol_corr_list = symbol_corr_list[0 : 2*return_number] - - tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE") - - q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name - self.cursor.execute(q1) - - for one_pair in symbol_corr_list: - one_symbol = one_pair[0] - one_corr = one_pair[1] - one_p_value = symbol_pvalue_dict[one_symbol] - - self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) ) - - return tmp_table_name - - #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it. def fetchLitCorrelations(self, species, GeneId, db, returnNumber): ### Used to generate Lit Correlations when calculations are done from text file. dcrowell August 2008 """Uses getTempLiteratureTable to generate table of literatire correlations. This function then gathers that data and @@ -612,57 +751,6 @@ class CorrelationResults(object): return litCorrDict - def fetch_tissue_correlations(self): - """Comments Possibly Out of Date!!!!! - - - Uses getTempTissueCorrTable to generate table of tissue correlations - - This function then gathers that data and pairs it with the TraitID string. - Takes as its arguments a formdata instance, and a database instance. - Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) - for the requested correlation - - Used when the user selects the tissue correlation method; i.e. not for the - column that is appended to all probeset trait correlation tables - - """ - - # table name string - temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB, - method=method) - - query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue - FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol - WHERE ProbeSetFreeze.Name = '{}' - and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSet.Symbol IS NOT NULL - and {}.Correlation IS NOT NULL""".format(dataset.mescape( - temp_table, temp_table, temp_table, temp_table, - self.dataset.name, temp_table)) - - results = g.db.execute(query).fetchall() - - tissue_corr_dict = {} - - for entry in results: - trait_name, tissue_corr, tissue_pvalue = entry - tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue) - #symbolList, - #geneIdDict, - #dataIdDict, - #ChrDict, - #MbDict, - #descDict, - #pTargetDescDict = getTissueProbeSetXRefInfo( - # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))) - - return tissue_corr_dict - def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None): @@ -819,7 +907,30 @@ class CorrelationResults(object): allcorrelations.append( one_traitinfo ) _log.info("Appending the results") + def calculate_corr_for_all_tissues(self, tissue_dataset_id=None): + + symbol_corr_dict = {} + symbol_pvalue_dict = {} + primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( + GeneNameLst=[self.this_trait.symbol], + TissueProbeSetFreezeId=tissue_dataset_id) + primary_trait_value = primary_trait_symbol_value_dict.values()[0] + + symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( + gene_name_list=[], + tissue_dataset_id=tissue_dataset_id) + + symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( + primaryTraitValue, + SymbolValueDict, + method=self.corr_method) + #else: + # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( + # primaryTraitValue, + # SymbolValueDict) + + return (symbolCorrDict, symbolPvalueDict) datasetFile.close() totalTraits = len(allcorrelations) _log.info("Done correlating using the fast method") @@ -939,78 +1050,6 @@ class CorrelationResults(object): return trait_list """ - def calculate_corr_for_all_tissues(self, tissue_dataset_id=None): - symbol_corr_dict = {} - symbol_pvalue_dict = {} - primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( - GeneNameLst=[self.this_trait.symbol], - TissueProbeSetFreezeId=tissue_dataset_id) - primary_trait_value = primary_trait_symbol_value_dict.values()[0] - - symbol_value_dict = correlation_functions.make_gene_tissue_value_dict( - gene_name_list=[], - tissue_dataset_id=tissue_dataset_id) - - symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( - primaryTraitValue, - SymbolValueDict, - method=self.corr_method) - #else: - # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr( - # primaryTraitValue, - # SymbolValueDict) - - return (symbolCorrDict, symbolPvalueDict) - - - def do_tissue_correlation_by_list(self, tissue_dataset_id=1): - """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" - - #Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) - - print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict)) - - if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - - #gene_symbol_list = [] - # - #for trait in self.correlation_results: - # if hasattr(trait, 'symbol'): - # gene_symbol_list.append(trait.symbol) - - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] - - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=gene_symbol_list) - - print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) - - for trait in self.correlation_results: - if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] - - result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) - - trait.tissue_corr = result[0] - trait.tissue_pvalue = result[2] - - #print("trait.tissue_corr / pvalue: ", str(trait.tissue_corr) + " :: " + str(trait.tissue_pvalue)) - - - # else: - # trait.tissue_corr = None - # trait.tissue_pvalue = None - #else: - # for trait in self.correlation_results: - # trait.tissue_corr = None - # trait.tissue_pvalue = None - - #return self.correlation_results diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 12a064c0..73502392 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -5,7 +5,7 @@
- -- cgit v1.2.3 From 46624ca0058dcf2014b7eadb8bd0a595b4041159 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Fri, 27 Sep 2013 16:25:48 -0500 Subject: Added trait links to correlation results Began adding the code that does the initial sort by literature correlation --- web/webqtl/correlation/CorrelationPage.py | 2 +- wqflask/base/mrna_assay_tissue_data.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 278 ++++++++++++----------- wqflask/wqflask/templates/correlation_page.html | 5 +- 4 files changed, 156 insertions(+), 131 deletions(-) (limited to 'wqflask/base') diff --git a/web/webqtl/correlation/CorrelationPage.py b/web/webqtl/correlation/CorrelationPage.py index 8c74ae0c..0c98f032 100755 --- a/web/webqtl/correlation/CorrelationPage.py +++ b/web/webqtl/correlation/CorrelationPage.py @@ -819,7 +819,7 @@ Resorting this table
#XZ, 12/12/2008: if the input geneid is 'None', return 0 #XZ, 12/12/2008: if the input geneid has no corresponding mouse geneid, return 0 def translateToMouseGeneID (self, species, geneid): - mouse_geneid = 0; + mouse_geneid = 0 #if input geneid is None, return 0. if not geneid: diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 7eb07028..be5df657 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -38,7 +38,7 @@ class MrnaAssayTissueData(object): # with highest mean value # Due to the limit size of TissueProbeSetFreezeId table in DB, # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) - print("len(gene_symbols): ", len(gene_symbols)) + #print("len(gene_symbols): ", len(gene_symbols)) if len(gene_symbols) == 0: query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index c6bc5b2a..42d5acd6 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -133,58 +133,46 @@ class CorrelationResults(object): if self.corr_type == "tissue": trait_symbol_dict = self.dataset.retrieve_gene_symbols() - trait_symbols = trait_symbol_dict.values - - tissue_corr_data = self.do_tissue_corr_for_all_traits(gene_symbol_list=trait_symbols) + tissue_corr_data = self.do_tissue_corr_for_all_traits(trait_gene_symbols = trait_symbol_dict) + #print("tissue_corr_data: ", pf(tissue_corr_data)) for trait in tissue_corr_data.keys()[:self.return_number]: - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(self.target_dataset.samplelist): - if sample in self.sample_data: - sample_value = self.sample_data[sample] - target_sample_value = self.target_dataset.trait_data[trait][index] - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( - this_trait_vals, target_vals) - - if self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.get_sample_r_and_p_values(trait = trait, target_samples = self.target_dataset.trait_data[trait]) + #this_trait_vals = [] + #target_vals = [] + #for index, sample in enumerate(self.target_dataset.samplelist): + # if sample in self.sample_data: + # sample_value = self.sample_data[sample] + # target_sample_value = self.target_dataset.trait_data[trait][index] + # this_trait_vals.append(sample_value) + # target_vals.append(target_sample_value) + # + #this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + # this_trait_vals, target_vals) + # + #if self.corr_method == 'pearson': + # sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + #else: + # sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + # + #self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + elif self.corr_type == "lit": + trait_symbol_dict = self.dataset.retrieve_gene_symbols() + elif self.corr_type == "sample": for trait, values in self.target_dataset.trait_data.iteritems(): - this_trait_vals = [] - target_vals = [] - for index, sample in enumerate(self.target_dataset.samplelist): - if sample in self.sample_data: - sample_value = self.sample_data[sample] - target_sample_value = values[index] - this_trait_vals.append(sample_value) - target_vals.append(target_sample_value) - - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( - this_trait_vals, target_vals) - - if self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] - + self.get_sample_r_and_p_values(trait = trait, target_samples = values) + self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), key=lambda t: -abs(t[1][0]))) + #print("correlation_data: ", pf(self.correlation_data)) + for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True) - print("gene symbol: ", trait_object.symbol) + #print("gene symbol: ", trait_object.symbol) trait_object.sample_r = self.correlation_data[trait][0] trait_object.sample_p = self.correlation_data[trait][1] @@ -193,17 +181,20 @@ class CorrelationResults(object): #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) - if self.corr_method != "tissue": + if self.corr_type == "tissue": + trait_object.tissue_corr = tissue_corr_data[trait][1] + trait_object.tissue_pvalue = tissue_corr_data[trait][2] + else: # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 - else: - trait_object.tissue_corr = tissue_corr_data[trait][1] - trait_object.tissue_pvalue = tissue_corr_data[trait][2] self.correlation_results.append(trait_object) - if self.corr_method != "tissue": + if self.corr_type != "lit": + self.do_lit_correlation_for_trait_list() + + if self.corr_type != "tissue": self.do_tissue_correlation_for_trait_list() print("self.correlation_results: ", pf(self.correlation_results)) @@ -308,36 +299,138 @@ class CorrelationResults(object): #return self.correlation_results - def do_tissue_corr_for_all_traits(self, trait_symbols, tissue_dataset_id=1): + def do_tissue_corr_for_all_traits(self, trait_gene_symbols, tissue_dataset_id=1): #Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list = [self.this_trait.symbol]) - correlation_data = {} if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=trait_symbols.values) - - print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) + symbol_list=trait_gene_symbols.values()) - for trait, symbol in trait_symbols.iteritems(): - if symbol.lower() in corr_result_tissue_vals_dict: + #print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) + + #print("trait_gene_symbols: ", pf(trait_gene_symbols)) + + tissue_corr_data = {} + for trait, symbol in trait_gene_symbols.iteritems(): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + #print("this_trait_tissue_values: ", pf(this_trait_tissue_values)) result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, self.corr_method) - correlation_results[trait] = [symbol, result[0], result[2]] + tissue_corr_data[trait] = [symbol, result[0], result[2]] - correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), - key=lambda t: -abs(t[1][1]))) + tissue_corr_data = collections.OrderedDict(sorted(tissue_corr_data.items(), + key=lambda t: -abs(t[1][1]))) + + return tissue_corr_data - return correlation_data + def do_lit_correlation_for_trait_list(self): + + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + + for trait in self.correlation_results: + + if trait.geneid: + trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) + else: + trait.mouse_gene_id = None + + if trait.mouse_gene_id and str(trait.mouse_gene_id).find(";") == -1: + result = g.db.execute( + """SELECT value + FROM LCorrRamin3 + WHERE GeneId1='%s' and + GeneId2='%s' + """ % (escape(trait.mouse_gene_id), escape(self.this_trait.geneid)) + ).fetchone() + if not result: + result = g.db.execute("""SELECT value + FROM LCorrRamin3 + WHERE GeneId2='%s' and + GeneId1='%s' + """ % (escape(trait.mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + + if result: + lit_corr = result.value + + if lit_corr: + trait.lit_corr = lit_corr + else: + trait.lit_corr = 0 + else: + trait.lit_corr = 0 + + + def convert_to_mouse_gene_id(self, species=None, gene_id=None): + """If the species is rat or human, translate the gene_id to the mouse geneid + + If there is no input gene_id or there's no corresponding mouse gene_id, return None + + """ + if not gene_id: + return None + + mouse_gene_id = None + + if species == 'mouse': + mouse_gene_id = gene_id + + elif species == 'rat': + mouse_gene_id = g.db.execute( + """SELECT mouse + FROM GeneIDXRef + WHERE rat='%d' + """, escape(int(gene_id))).fetchone().mouse + elif species == 'human': + mouse_gene_id = g.db.execute( + """SELECT mouse + FROM GeneIDXRef + WHERE human='%d' + """, escape(int(gene_id))).fetchone().mouse + + #print("mouse_geneid:", mouse_geneid) + + return mouse_gene_id + + def get_sample_r_and_p_values(self, trait, target_samples): + """Calculates the sample r (or rho) and p-value + + Given a primary trait and a target trait's sample values, + calculates either the pearson r or spearman rho and the p-value + using the corresponding scipy functions. + + """ + + this_trait_vals = [] + target_vals = [] + for index, sample in enumerate(self.target_dataset.samplelist): + if sample in self.sample_data: + sample_value = self.sample_data[sample] + target_sample_value = target_samples[index] + this_trait_vals.append(sample_value) + target_vals.append(target_sample_value) + + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) + + if self.corr_method == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + + self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + def do_tissue_corr_for_all_traits_2(self): """Comments Possibly Out of Date!!!!! @@ -508,39 +601,6 @@ class CorrelationResults(object): self.sample_data[str(sample)] = float(value) - #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid - #XZ, 12/12/2008: if the input geneid is 'None', return 0 - #XZ, 12/12/2008: if the input geneid has no corresponding mouse geneid, return 0 - def translateToMouseGeneID(self, species, geneid): - #mouse_geneid = 0 - - if not geneid: - return 0 - - #self.id, self.name, self.fullname, self.shortname = g.db.execute(""" - # SELECT Id, Name, FullName, ShortName - # FROM %s - # WHERE public > %s AND - # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') - # """ % (query_args)).fetchone() - - if species == 'mouse': - mouse_geneid = geneid - elif species == 'rat': - mouse_geneid = g.db.execute( - """SELECT mouse FROM GeneIDXRef WHERE rat='%d'""", int(geneid)).fetchone().mouse - #if record: - # mouse_geneid = record[0] - elif species == 'human': - mouse_geneid = g.db.execute( - """SELECT mouse FROM GeneIDXRef WHERE human='%d'""", int(geneid)).fetchone().mouse - #if record: - # mouse_geneid = record[0] - print("mouse_geneid:", mouse_geneid) - return mouse_geneid - - - ##XZ, 12/16/2008: the input geneid is of mouse type #def checkForLitInfo(self,geneId): @@ -751,44 +811,6 @@ class CorrelationResults(object): return litCorrDict - - def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None): - - tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE") - - q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName - q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName, input_trait_mouse_geneid) - q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName, input_trait_mouse_geneid, input_trait_mouse_geneid) - - for x in [q1,q2,q3]: - self.cursor.execute(x) - - for thisTrait in traitList: - try: - if thisTrait.geneid: - thisTrait.mouse_geneid = self.translateToMouseGeneID(species, thisTrait.geneid) - else: - thisTrait.mouse_geneid = 0 - except: - thisTrait.mouse_geneid = 0 - - if thisTrait.mouse_geneid and str(thisTrait.mouse_geneid).find(";") == -1: - try: - self.cursor.execute("SELECT value FROM %s WHERE GeneId2 = %s" % (tmpTableName, thisTrait.mouse_geneid)) - result = self.cursor.fetchone() - if result: - thisTrait.LCorr = result[0] - else: - thisTrait.LCorr = None - except: - thisTrait.LCorr = None - else: - thisTrait.LCorr = None - - self.cursor.execute("DROP TEMPORARY TABLE %s" % tmpTableName) - - return traitList - def get_traits(self, vals): #Todo: Redo cached stuff using memcached diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index 7082dbf2..4d09cf20 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -28,11 +28,13 @@ Sample r N Cases Sample p(r) + Lit Corr Tissue r Tissue p(r) {% else %} Sample rho Sample p(rho) + Lit Corr Tissue rho Tissue p(rho) {% endif %} @@ -42,7 +44,7 @@ {% for trait in correlation_results %} - {{ trait.name }} + {{ trait.name }} {{ trait.symbol }} {{ trait.alias }} {{ trait.description }} @@ -53,6 +55,7 @@ {{'%0.3f'|format(trait.sample_r)}} {{ trait.num_overlap }} {{'%0.3e'|format(trait.sample_p)}} + {{'%0.3f'|format(trait.lit_corr)}} {{'%0.3f'|format(trait.tissue_corr)}} {{'%0.3e'|format(trait.tissue_pvalue)}} -- cgit v1.2.3 From 9173f1e03f51cb141b0efa35b5e81c632b9a2689 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Tue, 8 Oct 2013 17:50:08 -0500 Subject: Literature correlation works when it is selected as the sorted correlation type (that is, when it is run again all traits in a database) Added a function to data_set.py that gets all the gene_ids in the data set. Not sure if having a separate function for getting the gene_ids and another for getting the gene symbols makes sense. --- wqflask/base/data_set.py | 15 +++++- wqflask/wqflask/correlation/show_corr_results.py | 65 +++++++++++++++++++----- 2 files changed, 67 insertions(+), 13 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 5d21c901..16f9da5d 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1078,7 +1078,20 @@ class MrnaAssayDataSet(DataSet): def retrieve_gene_symbols(self): query = """ - select ProbeSet.Name, ProbeSet.Symbol + select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId + from ProbeSet,ProbeSetXRef + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSetXRef.ProbeSetId=ProbeSet.Id; + """ % (self.id) + results = g.db.execute(query).fetchall() + symbol_dict = {} + for item in results: + symbol_dict[item[0]] = item[1] + return symbol_dict + + def retrieve_gene_ids(self): + query = """ + select ProbeSet.Name, ProbeSet.GeneId from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 42d5acd6..5df2f316 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -133,7 +133,7 @@ class CorrelationResults(object): if self.corr_type == "tissue": trait_symbol_dict = self.dataset.retrieve_gene_symbols() - tissue_corr_data = self.do_tissue_corr_for_all_traits(trait_gene_symbols = trait_symbol_dict) + tissue_corr_data = self.do_tissue_correlation_for_all_traits(trait_gene_symbols = trait_symbol_dict) #print("tissue_corr_data: ", pf(tissue_corr_data)) for trait in tissue_corr_data.keys()[:self.return_number]: @@ -158,8 +158,12 @@ class CorrelationResults(object): #self.correlation_data[trait] = [sample_r, sample_p, num_overlap] elif self.corr_type == "lit": - trait_symbol_dict = self.dataset.retrieve_gene_symbols() + trait_geneid_dict = self.dataset.retrieve_gene_ids() + lit_corr_data = self.do_lit_correlation_for_all_traits(trait_gene_ids = trait_geneid_dict) + for trait in lit_corr_data.keys()[:self.return_number]: + self.get_sample_r_and_p_values(trait = trait, target_samples = self.target_dataset.trait_data[trait]) + elif self.corr_type == "sample": for trait, values in self.target_dataset.trait_data.iteritems(): self.get_sample_r_and_p_values(trait = trait, target_samples = values) @@ -181,13 +185,15 @@ class CorrelationResults(object): #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) + # Set some sane defaults + trait_object.tissue_corr = 0 + trait_object.tissue_pvalue = 0 + trait_object.lit_corr = 0 if self.corr_type == "tissue": trait_object.tissue_corr = tissue_corr_data[trait][1] trait_object.tissue_pvalue = tissue_corr_data[trait][2] - else: - # Set some sane defaults - trait_object.tissue_corr = 0 - trait_object.tissue_pvalue = 0 + elif self.corr_type == "lit": + trait_object.lit_corr = lit_corr_data[trait][1] self.correlation_results.append(trait_object) @@ -299,7 +305,7 @@ class CorrelationResults(object): #return self.correlation_results - def do_tissue_corr_for_all_traits(self, trait_gene_symbols, tissue_dataset_id=1): + def do_tissue_correlation_for_all_traits(self, trait_gene_symbols, tissue_dataset_id=1): #Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list = [self.this_trait.symbol]) @@ -336,7 +342,7 @@ class CorrelationResults(object): def do_lit_correlation_for_trait_list(self): input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) - + for trait in self.correlation_results: if trait.geneid: @@ -350,7 +356,7 @@ class CorrelationResults(object): FROM LCorrRamin3 WHERE GeneId1='%s' and GeneId2='%s' - """ % (escape(trait.mouse_gene_id), escape(self.this_trait.geneid)) + """ % (escape(trait.mouse_gene_id), escape(input_trait_mouse_gene_id)) ).fetchone() if not result: result = g.db.execute("""SELECT value @@ -361,9 +367,7 @@ class CorrelationResults(object): ).fetchone() if result: - lit_corr = result.value - - if lit_corr: + lit_corr = result.value trait.lit_corr = lit_corr else: trait.lit_corr = 0 @@ -371,6 +375,43 @@ class CorrelationResults(object): trait.lit_corr = 0 + def do_lit_correlation_for_all_traits(self, trait_gene_ids): + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + + lit_corr_data = {} + for trait, gene_id in trait_gene_ids.iteritems(): + mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) + + if mouse_gene_id and str(mouse_gene_id).find(";") == -1: + print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) + result = g.db.execute( + """SELECT value + FROM LCorrRamin3 + WHERE GeneId1='%s' and + GeneId2='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if not result: + result = g.db.execute("""SELECT value + FROM LCorrRamin3 + WHERE GeneId2='%s' and + GeneId1='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if result: + print("result:", result) + lit_corr = result.value + lit_corr_data[trait] = [gene_id, lit_corr] + else: + lit_corr_data[trait] = [gene_id, 0] + else: + lit_corr_data[trait] = [gene_id, 0] + + lit_corr_data = collections.OrderedDict(sorted(lit_corr_data.items(), + key=lambda t: -abs(t[1][1]))) + + return lit_corr_data + def convert_to_mouse_gene_id(self, species=None, gene_id=None): """If the species is rat or human, translate the gene_id to the mouse geneid -- cgit v1.2.3 From 5daef1bd5e6e494b477797993bb72488b24dd8b3 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Wed, 9 Oct 2013 17:50:48 -0500 Subject: Improved some of the code related to the correlation page For example, changed the two functions getting gene symbols and ids for a dataset into one function that can take a column name as a parameter --- wqflask/base/data_set.py | 63 +++++++++++++------ wqflask/wqflask/correlation/show_corr_results.py | 79 ++++++++++-------------- 2 files changed, 74 insertions(+), 68 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 16f9da5d..20c9a24f 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1075,32 +1075,55 @@ class MrnaAssayDataSet(DataSet): """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() return results - - def retrieve_gene_symbols(self): - query = """ - select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId - from ProbeSet,ProbeSetXRef - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSetXRef.ProbeSetId=ProbeSet.Id; - """ % (self.id) - results = g.db.execute(query).fetchall() - symbol_dict = {} - for item in results: - symbol_dict[item[0]] = item[1] - return symbol_dict - def retrieve_gene_ids(self): + + def retrieve_genes(self, column_name): query = """ - select ProbeSet.Name, ProbeSet.GeneId + select ProbeSet.Name, ProbeSet.%s from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; - """ % (self.id) + """ % (column_name, escape(str(self.id))) results = g.db.execute(query).fetchall() - symbol_dict = {} - for item in results: - symbol_dict[item[0]] = item[1] - return symbol_dict + print("in retrieve_genes results {}: {}".format(type(results), results)) + + return dict(results) + + #return {item[0]: item[1] for item in results} + + #symbol_dict = {} + #for item in results: + # symbol_dict[item[0]] = item[1] + #return symbol_dict + + #def retrieve_gene_symbols(self): + # query = """ + # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId + # from ProbeSet,ProbeSetXRef + # where ProbeSetXRef.ProbeSetFreezeId = %s and + # ProbeSetXRef.ProbeSetId=ProbeSet.Id; + # """ % (self.id) + # results = g.db.execute(query).fetchall() + # symbol_dict = {} + # for item in results: + # symbol_dict[item[0]] = item[1] + # return symbol_dict + # + #def retrieve_gene_ids(self): + # query = """ + # select ProbeSet.Name, ProbeSet.GeneId + # from ProbeSet,ProbeSetXRef + # where ProbeSetXRef.ProbeSetFreezeId = %s and + # ProbeSetXRef.ProbeSetId=ProbeSet.Id; + # """ % (self.id) + # return process_and_run_query(query) + # results = g.db.execute(query).fetchall() + # symbol_dict = {} + # for item in results: + # symbol_dict[item[0]] = item[1] + # return symbol_dict + + class TempDataSet(DataSet): diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 5df2f316..258dcfa4 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -93,9 +93,6 @@ class CorrelationResults(object): # get trait list from db (database name) # calculate correlation with Base vector and targets - #self.this_trait = GeneralTrait(dataset=self.dataset.name, - # name=start_vars['trait_id'], - # cellid=None) with Bench("Doing correlations"): helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() @@ -114,9 +111,9 @@ class CorrelationResults(object): self.dataset.group.f1list + self.dataset.group.samplelist) - #If either BXD/whatever Only or All Samples, append all of that group's samplelist + #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': - self.process_samples(start_vars, primary_samples, ()) + self.process_samples(start_vars, primary_samples) #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and #exclude the primary samples (because they would have been added in the previous @@ -132,55 +129,36 @@ class CorrelationResults(object): self.correlation_data = {} if self.corr_type == "tissue": - trait_symbol_dict = self.dataset.retrieve_gene_symbols() - tissue_corr_data = self.do_tissue_correlation_for_all_traits(trait_gene_symbols = trait_symbol_dict) - #print("tissue_corr_data: ", pf(tissue_corr_data)) + self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol") + tissue_corr_data = self.do_tissue_correlation_for_all_traits() for trait in tissue_corr_data.keys()[:self.return_number]: - self.get_sample_r_and_p_values(trait = trait, target_samples = self.target_dataset.trait_data[trait]) - #this_trait_vals = [] - #target_vals = [] - #for index, sample in enumerate(self.target_dataset.samplelist): - # if sample in self.sample_data: - # sample_value = self.sample_data[sample] - # target_sample_value = self.target_dataset.trait_data[trait][index] - # this_trait_vals.append(sample_value) - # target_vals.append(target_sample_value) - # - #this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( - # this_trait_vals, target_vals) - # - #if self.corr_method == 'pearson': - # sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) - #else: - # sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - # - #self.correlation_data[trait] = [sample_r, sample_p, num_overlap] - + self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + elif self.corr_type == "lit": - trait_geneid_dict = self.dataset.retrieve_gene_ids() - lit_corr_data = self.do_lit_correlation_for_all_traits(trait_gene_ids = trait_geneid_dict) + self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId") + lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in lit_corr_data.keys()[:self.return_number]: - self.get_sample_r_and_p_values(trait = trait, target_samples = self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in self.target_dataset.trait_data.iteritems(): - self.get_sample_r_and_p_values(trait = trait, target_samples = values) + self.get_sample_r_and_p_values(trait, values) self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), key=lambda t: -abs(t[1][0]))) - #print("correlation_data: ", pf(self.correlation_data)) for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True) - - #print("gene symbol: ", trait_object.symbol) - trait_object.sample_r = self.correlation_data[trait][0] - trait_object.sample_p = self.correlation_data[trait][1] - trait_object.num_overlap = self.correlation_data[trait][2] + (trait_object.sample_r, + trait_object.sample_p, + trait_object.num_overlap) = self.correlation_data[trait] + + #trait_object.sample_p = self.correlation_data[trait][1] + #trait_object.num_overlap = self.correlation_data[trait][2] #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef, #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions) @@ -194,7 +172,6 @@ class CorrelationResults(object): trait_object.tissue_pvalue = tissue_corr_data[trait][2] elif self.corr_type == "lit": trait_object.lit_corr = lit_corr_data[trait][1] - self.correlation_results.append(trait_object) if self.corr_type != "lit": @@ -305,7 +282,7 @@ class CorrelationResults(object): #return self.correlation_results - def do_tissue_correlation_for_all_traits(self, trait_gene_symbols, tissue_dataset_id=1): + def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1): #Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list = [self.this_trait.symbol]) @@ -315,14 +292,14 @@ class CorrelationResults(object): #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=trait_gene_symbols.values()) + symbol_list=self.trait_symbol_dict.values()) #print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) #print("trait_gene_symbols: ", pf(trait_gene_symbols)) tissue_corr_data = {} - for trait, symbol in trait_gene_symbols.iteritems(): + for trait, symbol in self.trait_symbol_dict.iteritems(): if symbol and symbol.lower() in corr_result_tissue_vals_dict: this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] #print("this_trait_tissue_values: ", pf(this_trait_tissue_values)) @@ -375,15 +352,15 @@ class CorrelationResults(object): trait.lit_corr = 0 - def do_lit_correlation_for_all_traits(self, trait_gene_ids): + def do_lit_correlation_for_all_traits(self): input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} - for trait, gene_id in trait_gene_ids.iteritems(): + for trait, gene_id in self.trait_geneid_dict.iteritems(): mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: - print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) + #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) result = g.db.execute( """SELECT value FROM LCorrRamin3 @@ -399,7 +376,7 @@ class CorrelationResults(object): """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) ).fetchone() if result: - print("result:", result) + #print("result:", result) lit_corr = result.value lit_corr_data[trait] = [gene_id, lit_corr] else: @@ -458,7 +435,9 @@ class CorrelationResults(object): for index, sample in enumerate(self.target_dataset.samplelist): if sample in self.sample_data: sample_value = self.sample_data[sample] + print("sample_value:", sample_value) target_sample_value = target_samples[index] + print("target_sample_value:", target_sample_value) this_trait_vals.append(sample_value) target_vals.append(target_sample_value) @@ -470,7 +449,8 @@ class CorrelationResults(object): else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + def do_tissue_corr_for_all_traits_2(self): @@ -632,7 +612,10 @@ class CorrelationResults(object): ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id """ - def process_samples(self, start_vars, sample_names, excluded_samples): + def process_samples(self, start_vars, sample_names, excluded_samples=None): + if not excluded_samples: + excluded_samples = () + for sample in sample_names: if sample not in excluded_samples: value = start_vars['value:' + sample] -- cgit v1.2.3 From 8a09358e98dbf88deb101d13107a40bac371de5c Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 10 Oct 2013 17:09:21 -0500 Subject: Almost have correlation working for non-primary strain/group There's just some issue with parents/f1s not being included if you select non-BXD (or whatever the group is). All Samples, however does work. --- wqflask/base/data_set.py | 17 +++++++---------- wqflask/wqflask/correlation/show_corr_results.py | 19 +++++++++---------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 20c9a24f..beb62bd7 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -436,8 +436,13 @@ class DataSet(object): print("Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - def get_trait_data(self): - self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + def get_trait_data(self, sample_list=None): + if sample_list: + self.samplelist = sample_list + self.group.parlist + self.group.f1list + else: + self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + + query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} @@ -1085,16 +1090,8 @@ class MrnaAssayDataSet(DataSet): ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) results = g.db.execute(query).fetchall() - print("in retrieve_genes results {}: {}".format(type(results), results)) return dict(results) - - #return {item[0]: item[1] for item in results} - - #symbol_dict = {} - #for item in results: - # symbol_dict[item[0]] = item[1] - #return symbol_dict #def retrieve_gene_symbols(self): # query = """ diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 258dcfa4..a5c80674 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -70,7 +70,7 @@ TISSUE_MOUSE_DB = 1 def print_mem(stage=""): mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - print("{}: {}".format(stage, mem/1024)) + #print("{}: {}".format(stage, mem/1024)) class AuthException(Exception): @@ -119,10 +119,14 @@ class CorrelationResults(object): #exclude the primary samples (because they would have been added in the previous #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': + if corr_samples_group == 'samples_other': + primary_samples = [x for x in primary_samples if x not in ( + self.dataset.group.parlist + self.dataset.group.f1list)] + print("primary_samples:", primary_samples) self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) - self.target_dataset.get_trait_data() + self.target_dataset.get_trait_data(self.sample_data.keys()) self.correlation_results = [] @@ -180,7 +184,7 @@ class CorrelationResults(object): if self.corr_type != "tissue": self.do_tissue_correlation_for_trait_list() - print("self.correlation_results: ", pf(self.correlation_results)) + #print("self.correlation_results: ", pf(self.correlation_results)) #XZ, 09/18/2008: get all information about the user selected database. @@ -241,8 +245,6 @@ class CorrelationResults(object): #Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list = [self.this_trait.symbol]) - - print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict)) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] @@ -258,8 +260,6 @@ class CorrelationResults(object): corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=gene_symbol_list) - print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) - for trait in self.correlation_results: if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] @@ -430,14 +430,14 @@ class CorrelationResults(object): """ + print("len(self.sample_data):", len(self.sample_data)) + this_trait_vals = [] target_vals = [] for index, sample in enumerate(self.target_dataset.samplelist): if sample in self.sample_data: sample_value = self.sample_data[sample] - print("sample_value:", sample_value) target_sample_value = target_samples[index] - print("target_sample_value:", target_sample_value) this_trait_vals.append(sample_value) target_vals.append(target_sample_value) @@ -995,7 +995,6 @@ class CorrelationResults(object): values_2.append(target_value) correlation = calCorrelation(values_1, values_2) self.correlation_data[trait] = correlation - print ('correlation result: %s %s' % (trait, correlation)) """ correlations = [] -- cgit v1.2.3