diff options
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x | wqflask/base/data_set.py | 51 | ||||
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 148 |
2 files changed, 128 insertions, 71 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index befbd518..9fa7beb3 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -438,8 +438,13 @@ class DataSet(object): print("Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - def get_trait_data(self): - self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + def get_trait_data(self, sample_list=None): + if sample_list: + self.samplelist = sample_list + self.group.parlist + self.group.f1list + else: + self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + + query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} @@ -1057,7 +1062,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() return results[0] - def retrieve_sample_data(self, trait): query = """ @@ -1078,6 +1082,47 @@ class MrnaAssayDataSet(DataSet): """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() return results + + + def retrieve_genes(self, column_name): + query = """ + select ProbeSet.Name, ProbeSet.%s + from ProbeSet,ProbeSetXRef + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSetXRef.ProbeSetId=ProbeSet.Id; + """ % (column_name, escape(str(self.id))) + results = g.db.execute(query).fetchall() + + return dict(results) + + #def retrieve_gene_symbols(self): + # query = """ + # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId + # from ProbeSet,ProbeSetXRef + # where ProbeSetXRef.ProbeSetFreezeId = %s and + # ProbeSetXRef.ProbeSetId=ProbeSet.Id; + # """ % (self.id) + # results = g.db.execute(query).fetchall() + # symbol_dict = {} + # for item in results: + # symbol_dict[item[0]] = item[1] + # return symbol_dict + # + #def retrieve_gene_ids(self): + # query = """ + # select ProbeSet.Name, ProbeSet.GeneId + # from ProbeSet,ProbeSetXRef + # where ProbeSetXRef.ProbeSetFreezeId = %s and + # ProbeSetXRef.ProbeSetId=ProbeSet.Id; + # """ % (self.id) + # return process_and_run_query(query) + # results = g.db.execute(query).fetchall() + # symbol_dict = {} + # for item in results: + # symbol_dict[item[0]] = item[1] + # return symbol_dict + + class TempDataSet(DataSet): diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index a08f3f21..be5df657 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -9,6 +9,8 @@ from utility import Bunch from MySQLdb import escape_string as escape +from pprint import pformat as pf + class MrnaAssayTissueData(object): def __init__(self, gene_symbols=None): @@ -35,14 +37,15 @@ class MrnaAssayTissueData(object): # Note that inner join is necessary in this query to get distinct record in one symbol group # with highest mean value # Due to the limit size of TissueProbeSetFreezeId table in DB, - # performance of inner join is acceptable. + # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) + #print("len(gene_symbols): ", len(gene_symbols)) if len(gene_symbols) == 0: query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; ''' else: - in_clause = dbtools.create_in_clause(gene_symbols) + in_clause = db_tools.create_in_clause(gene_symbols) query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol @@ -51,17 +54,19 @@ class MrnaAssayTissueData(object): results = g.db.execute(query).fetchall() for result in results: - symbol = item[0] - gene_symbols.append(symbol) - symbol = symbol.lower() - - self.data[symbol].gene_id = result.GeneId - self.data[symbol].data_id = result.DataId - self.data[symbol].chr = result.Chr - self.data[symbol].mb = result.Mb - self.data[symbol].description = result.description - self.data[symbol].probe_target_description = result.Probe_Target_Description + symbol = result[0] + if symbol in gene_symbols: + #gene_symbols.append(symbol) + symbol = symbol.lower() + + self.data[symbol].gene_id = result.GeneId + self.data[symbol].data_id = result.DataId + self.data[symbol].chr = result.Chr + self.data[symbol].mb = result.Mb + self.data[symbol].description = result.description + self.data[symbol].probe_target_description = result.Probe_Target_Description + #print("self.data: ", pf(self.data)) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -70,65 +75,72 @@ class MrnaAssayTissueData(object): #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). #Attention! All keys are lower case! ########################################################################### - def get_symbol_value_pairs(self): - - id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data] - - symbol_value_pairs = {} - value_list=[] - query = """SELECT value, id - FROM TissueProbeSetData - WHERE Id IN {}""".format(create_in_clause(id_list)) - - try : - results = g.db.execute(query).fetchall() - for result in results: - value_list.append(result.value) - symbol_value_pairs[symbol] = value_list - except: - symbol_value_pairs[symbol] = None - - #for symbol in symbol_list: - # if tissue_data.has_key(symbol): - # data_id = tissue_data[symbol].data_id - # - # query = """select value, id - # from TissueProbeSetData - # where Id={}""".format(escape(data_id)) - # try : - # results = g.db.execute(query).fetchall() - # for item in results: - # item = item[0] - # value_list.append(item) - # symbol_value_pairs[symbol] = value_list - # value_list=[] - # except: - # symbol_value_pairs[symbol] = None + def get_symbol_values_pairs(self): + id_list = [self.data[symbol].data_id for symbol in self.data] + + symbol_values_dict = {} + + query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value + FROM TissueProbeSetXRef, TissueProbeSetData + WHERE TissueProbeSetData.Id IN {} and + TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + + results = g.db.execute(query).fetchall() + for result in results: + if result.Symbol.lower() not in symbol_values_dict: + symbol_values_dict[result.Symbol.lower()] = [result.value] + else: + symbol_values_dict[result.Symbol.lower()].append(result.value) + + #for symbol in self.data: + # data_id = self.data[symbol].data_id + # symbol_values_dict[symbol] = self.get_tissue_values(data_id) + - return symbol_value_pairs + return symbol_values_dict - ######################################################################################################## - #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol - #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. - # key is symbol, value is one list of expression values of one probeSet. - #function: wrapper function for getSymbolValuePairDict function - # build gene symbol list if necessary, cut it into small lists if necessary, - # then call getSymbolValuePairDict function and merge the results. - ######################################################################################################## - def get_trait_symbol_and_tissue_values(symbol_list=None): - tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) + #def get_tissue_values(self, data_id): + # """Gets the tissue values for a particular gene""" + # + # tissue_values=[] + # + # query = """SELECT value, id + # FROM TissueProbeSetData + # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id)) + # + # #try : + # results = g.db.execute(query).fetchall() + # for result in results: + # tissue_values.append(result.value) + # #symbol_values_dict[symbol] = value_list + # #except: + # # symbol_values_pairs[symbol] = None + # + # return tissue_values - #symbolList, - #geneIdDict, - #dataIdDict, - #ChrDict, - #MbDict, - #descDict, - #pTargetDescDict = getTissueProbeSetXRefInfo( - # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) - - if len(tissue_data.gene_symbols): - return get_symbol_value_pairs(tissue_data) +######################################################################################################## +#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# key is symbol, value is one list of expression values of one probeSet. +#function: wrapper function for getSymbolValuePairDict function +# build gene symbol list if necessary, cut it into small lists if necessary, +# then call getSymbolValuePairDict function and merge the results. +######################################################################################################## + +#def get_trait_symbol_and_tissue_values(symbol_list=None): +# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) +# +# #symbolList, +# #geneIdDict, +# #dataIdDict, +# #ChrDict, +# #MbDict, +# #descDict, +# #pTargetDescDict = getTissueProbeSetXRefInfo( +# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) +# +# if len(tissue_data.gene_symbols): +# return get_symbol_values_pairs(tissue_data) |