aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base')
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py134
-rwxr-xr-xwqflask/base/trait.py5
2 files changed, 135 insertions, 4 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
new file mode 100644
index 00000000..8ae71858
--- /dev/null
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import, print_function, division
+
+import collections
+
+from flask import g
+
+from utility import dbtools
+from uitility import Bunch
+
+from MySQLdb import escape_string as escape
+
+class MrnaAssayTissueData(object):
+
+ def __init__(self, gene_symbols=None):
+ self.gene_symbols = gene_symbols
+ self.have_data = False
+ if self.gene_symbols == None:
+ self.gene_symbols = []
+
+ self.data = collections.defaultdict(Bunch)
+
+ #self.gene_id_dict ={}
+ #self.data_id_dict = {}
+ #self.chr_dict = {}
+ #self.mb_dict = {}
+ #self.desc_dict = {}
+ #self.probe_target_desc_dict = {}
+
+ query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
+ from (
+ select Symbol, max(Mean) as maxmean
+ from TissueProbeSetXRef
+ where TissueProbeSetFreezeId=1 and '''
+
+ # Note that inner join is necessary in this query to get distinct record in one symbol group
+ # with highest mean value
+ # Due to the limit size of TissueProbeSetFreezeId table in DB,
+ # performance of inner join is acceptable.
+ if len(gene_symbols) == 0:
+ query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''
+ else:
+ in_clause = dbtools.create_in_clause(gene_symbols)
+
+ query += ''' Symbol in {} group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''.format(in_clause)
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ symbol = item[0]
+ gene_symbols.append(symbol)
+ symbol = symbol.lower()
+
+ self.data[symbol].gene_id = result.GeneId
+ self.data[symbol].data_id = result.DataId
+ self.data[symbol].chr = result.Chr
+ self.data[symbol].mb = result.Mb
+ self.data[symbol].description = result.description
+ self.data[symbol].probe_target_description = result.Probe_Target_Description
+
+
+ ###########################################################################
+ #Input: cursor, symbolList (list), dataIdDict(Dict)
+ #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+ # key is symbol, value is one list of expression values of one probeSet;
+ #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+ #Attention! All keys are lower case!
+ ###########################################################################
+ def get_symbol_value_pairs(self):
+
+ id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
+
+ symbol_value_pairs = {}
+ value_list=[]
+
+ query = """SELECT value, id
+ FROM TissueProbeSetData
+ WHERE Id IN {}""".format(create_in_clause(id_list))
+
+ try :
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ value_list.append(result.value)
+ symbol_value_pairs[symbol] = value_list
+ except:
+ symbol_value_pairs[symbol] = None
+
+ #for symbol in symbol_list:
+ # if tissue_data.has_key(symbol):
+ # data_id = tissue_data[symbol].data_id
+ #
+ # query = """select value, id
+ # from TissueProbeSetData
+ # where Id={}""".format(escape(data_id))
+ # try :
+ # results = g.db.execute(query).fetchall()
+ # for item in results:
+ # item = item[0]
+ # value_list.append(item)
+ # symbol_value_pairs[symbol] = value_list
+ # value_list=[]
+ # except:
+ # symbol_value_pairs[symbol] = None
+
+ return symbol_value_pairs
+
+ ########################################################################################################
+ #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+ #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+ # key is symbol, value is one list of expression values of one probeSet.
+ #function: wrapper function for getSymbolValuePairDict function
+ # build gene symbol list if necessary, cut it into small lists if necessary,
+ # then call getSymbolValuePairDict function and merge the results.
+ ########################################################################################################
+
+ def get_trait_symbol_and_tissue_values(symbol_list=None):
+ tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+
+ #symbolList,
+ #geneIdDict,
+ #dataIdDict,
+ #ChrDict,
+ #MbDict,
+ #descDict,
+ #pTargetDescDict = getTissueProbeSetXRefInfo(
+ # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+
+ if len(tissue_data.gene_symbols):
+ return get_symbol_value_pairs(tissue_data)
+
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index c893c887..6a64eeaf 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -286,7 +286,6 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
trait_info = g.db.execute(query).fetchone()
- #print("trait_info is: ", pf(trait_info))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.dataset.type == 'Geno':
@@ -359,7 +358,6 @@ class GeneralTrait(object):
InbredSet.SpeciesId = Species.Id AND
Species.TaxonomyId = Homologene.TaxonomyId
""" % (escape(str(self.geneid)), escape(self.dataset.group.name))
- print("-> query is:", query)
result = g.db.execute(query).fetchone()
#else:
# result = None
@@ -391,7 +389,6 @@ class GeneralTrait(object):
Geno.Name = '{}' and
Geno.SpeciesId = Species.Id
""".format(self.dataset.group.species, self.locus)
- print("query is:", query)
result = g.db.execute(query).fetchone()
self.locus_chr = result[0]
self.locus_mb = result[1]
@@ -603,4 +600,4 @@ class GeneralTrait(object):
else:
ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
ZValue = ZValue*sqrt(self.overlap-3)
- self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) \ No newline at end of file
+ self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))