diff options
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x | wqflask/base/data_set.py | 51 | ||||
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 146 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 5 |
3 files changed, 195 insertions, 7 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 96e04df0..beb62bd7 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -436,8 +436,13 @@ class DataSet(object): print("Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - def get_trait_data(self): - self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + def get_trait_data(self, sample_list=None): + if sample_list: + self.samplelist = sample_list + self.group.parlist + self.group.f1list + else: + self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + + query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} @@ -1055,7 +1060,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() return results[0] - def retrieve_sample_data(self, trait): query = """ @@ -1076,6 +1080,47 @@ class MrnaAssayDataSet(DataSet): """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() return results + + + def retrieve_genes(self, column_name): + query = """ + select ProbeSet.Name, ProbeSet.%s + from ProbeSet,ProbeSetXRef + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSetXRef.ProbeSetId=ProbeSet.Id; + """ % (column_name, escape(str(self.id))) + results = g.db.execute(query).fetchall() + + return dict(results) + + #def retrieve_gene_symbols(self): + # query = """ + # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId + # from ProbeSet,ProbeSetXRef + # where ProbeSetXRef.ProbeSetFreezeId = %s and + # ProbeSetXRef.ProbeSetId=ProbeSet.Id; + # """ % (self.id) + # results = g.db.execute(query).fetchall() + # symbol_dict = {} + # for item in results: + # symbol_dict[item[0]] = item[1] + # return symbol_dict + # + #def retrieve_gene_ids(self): + # query = """ + # select ProbeSet.Name, ProbeSet.GeneId + # from ProbeSet,ProbeSetXRef + # where ProbeSetXRef.ProbeSetFreezeId = %s and + # ProbeSetXRef.ProbeSetId=ProbeSet.Id; + # """ % (self.id) + # return process_and_run_query(query) + # results = g.db.execute(query).fetchall() + # symbol_dict = {} + # for item in results: + # symbol_dict[item[0]] = item[1] + # return symbol_dict + + class TempDataSet(DataSet): diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py new file mode 100644 index 00000000..be5df657 --- /dev/null +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -0,0 +1,146 @@ +from __future__ import absolute_import, print_function, division + +import collections + +from flask import g + +from utility import db_tools +from utility import Bunch + +from MySQLdb import escape_string as escape + +from pprint import pformat as pf + +class MrnaAssayTissueData(object): + + def __init__(self, gene_symbols=None): + self.gene_symbols = gene_symbols + self.have_data = False + if self.gene_symbols == None: + self.gene_symbols = [] + + self.data = collections.defaultdict(Bunch) + + #self.gene_id_dict ={} + #self.data_id_dict = {} + #self.chr_dict = {} + #self.mb_dict = {} + #self.desc_dict = {} + #self.probe_target_desc_dict = {} + + query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description + from ( + select Symbol, max(Mean) as maxmean + from TissueProbeSetXRef + where TissueProbeSetFreezeId=1 and ''' + + # Note that inner join is necessary in this query to get distinct record in one symbol group + # with highest mean value + # Due to the limit size of TissueProbeSetFreezeId table in DB, + # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) + #print("len(gene_symbols): ", len(gene_symbols)) + if len(gene_symbols) == 0: + query += '''Symbol!='' and Symbol Is Not Null group by Symbol) + as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol + and t.Mean = x.maxmean; + ''' + else: + in_clause = db_tools.create_in_clause(gene_symbols) + + query += ''' Symbol in {} group by Symbol) + as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol + and t.Mean = x.maxmean; + '''.format(in_clause) + + results = g.db.execute(query).fetchall() + for result in results: + symbol = result[0] + if symbol in gene_symbols: + #gene_symbols.append(symbol) + symbol = symbol.lower() + + self.data[symbol].gene_id = result.GeneId + self.data[symbol].data_id = result.DataId + self.data[symbol].chr = result.Chr + self.data[symbol].mb = result.Mb + self.data[symbol].description = result.description + self.data[symbol].probe_target_description = result.Probe_Target_Description + + #print("self.data: ", pf(self.data)) + + ########################################################################### + #Input: cursor, symbolList (list), dataIdDict(Dict) + #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, + # key is symbol, value is one list of expression values of one probeSet; + #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). + #Attention! All keys are lower case! + ########################################################################### + + def get_symbol_values_pairs(self): + id_list = [self.data[symbol].data_id for symbol in self.data] + + symbol_values_dict = {} + + query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value + FROM TissueProbeSetXRef, TissueProbeSetData + WHERE TissueProbeSetData.Id IN {} and + TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + + results = g.db.execute(query).fetchall() + for result in results: + if result.Symbol.lower() not in symbol_values_dict: + symbol_values_dict[result.Symbol.lower()] = [result.value] + else: + symbol_values_dict[result.Symbol.lower()].append(result.value) + + #for symbol in self.data: + # data_id = self.data[symbol].data_id + # symbol_values_dict[symbol] = self.get_tissue_values(data_id) + + + return symbol_values_dict + + + #def get_tissue_values(self, data_id): + # """Gets the tissue values for a particular gene""" + # + # tissue_values=[] + # + # query = """SELECT value, id + # FROM TissueProbeSetData + # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id)) + # + # #try : + # results = g.db.execute(query).fetchall() + # for result in results: + # tissue_values.append(result.value) + # #symbol_values_dict[symbol] = value_list + # #except: + # # symbol_values_pairs[symbol] = None + # + # return tissue_values + +######################################################################################################## +#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# key is symbol, value is one list of expression values of one probeSet. +#function: wrapper function for getSymbolValuePairDict function +# build gene symbol list if necessary, cut it into small lists if necessary, +# then call getSymbolValuePairDict function and merge the results. +######################################################################################################## + +#def get_trait_symbol_and_tissue_values(symbol_list=None): +# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) +# +# #symbolList, +# #geneIdDict, +# #dataIdDict, +# #ChrDict, +# #MbDict, +# #descDict, +# #pTargetDescDict = getTissueProbeSetXRefInfo( +# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) +# +# if len(tissue_data.gene_symbols): +# return get_symbol_values_pairs(tissue_data) + diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index c893c887..6a64eeaf 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -286,7 +286,6 @@ class GeneralTrait(object): escape(self.dataset.name), escape(self.name)) trait_info = g.db.execute(query).fetchone() - #print("trait_info is: ", pf(trait_info)) #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif self.dataset.type == 'Geno': @@ -359,7 +358,6 @@ class GeneralTrait(object): InbredSet.SpeciesId = Species.Id AND Species.TaxonomyId = Homologene.TaxonomyId """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) - print("-> query is:", query) result = g.db.execute(query).fetchone() #else: # result = None @@ -391,7 +389,6 @@ class GeneralTrait(object): Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(self.dataset.group.species, self.locus) - print("query is:", query) result = g.db.execute(query).fetchone() self.locus_chr = result[0] self.locus_mb = result[1] @@ -603,4 +600,4 @@ class GeneralTrait(object): else: ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation)) ZValue = ZValue*sqrt(self.overlap-3) - self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
\ No newline at end of file + self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) |