Merge /home/lei/gene

author: Zachary Sloan 2013-09-13 14:30:20 -0500
committer: Zachary Sloan 2013-09-13 14:30:20 -0500
commit: 261c7852ceaecf2034923ef1c4ec1481db786edd (patch)
tree: a0c9c9da9849f8693e746f6d80a715018e0c0da6 /wqflask/base
parent: 20be011f8b33fcde94037af19e403d3b76d5c9d1 (diff)
parent: af24c0d610d9a2189f86677e4f23deb372ee2bf7 (diff)
download: genenetwork2-261c7852ceaecf2034923ef1c4ec1481db786edd.tar.gz
2 files changed, 135 insertions, 4 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
new file mode 100644
index 00000000..8ae71858
--- /dev/null
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import, print_function, division
+
+import collections
+
+from flask import g
+
+from utility import dbtools
+from uitility import Bunch
+
+from MySQLdb import escape_string as escape
+
+class MrnaAssayTissueData(object):
+    
+    def __init__(self, gene_symbols=None):
+        self.gene_symbols = gene_symbols
+        self.have_data = False
+        if self.gene_symbols == None:
+            self.gene_symbols = []
+        
+        self.data = collections.defaultdict(Bunch)
+            
+        #self.gene_id_dict ={}
+        #self.data_id_dict = {}
+        #self.chr_dict = {}
+        #self.mb_dict = {}
+        #self.desc_dict = {}
+        #self.probe_target_desc_dict = {}
+        
+        query =  '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
+                        from (
+                        select Symbol, max(Mean) as maxmean
+                        from TissueProbeSetXRef
+                        where TissueProbeSetFreezeId=1 and '''
+        
+        # Note that inner join is necessary in this query to get distinct record in one symbol group
+        # with highest mean value
+        # Due to the limit size of TissueProbeSetFreezeId table in DB,
+        # performance of inner join is acceptable.
+        if len(gene_symbols) == 0:
+            query +=  '''Symbol!='' and Symbol Is Not Null group by Symbol)
+                as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+                and t.Mean = x.maxmean;  
+                    '''
+        else:
+            in_clause = dbtools.create_in_clause(gene_symbols)
+            
+            query += ''' Symbol in {} group by Symbol)
+                as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+                and t.Mean = x.maxmean;
+                    '''.format(in_clause)
+
+        results = g.db.execute(query).fetchall()
+        for result in results:
+            symbol = item[0]
+            gene_symbols.append(symbol)
+            symbol = symbol.lower()
+            
+            self.data[symbol].gene_id = result.GeneId
+            self.data[symbol].data_id = result.DataId
+            self.data[symbol].chr = result.Chr
+            self.data[symbol].mb = result.Mb
+            self.data[symbol].description = result.description
+            self.data[symbol].probe_target_description = result.Probe_Target_Description
+
+
+    ###########################################################################
+    #Input: cursor, symbolList (list), dataIdDict(Dict)
+    #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+    #        key is symbol, value is one list of expression values of one probeSet;
+    #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+    #Attention! All keys are lower case!
+    ###########################################################################
+    def get_symbol_value_pairs(self):
+        
+        id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
+    
+        symbol_value_pairs = {}
+        value_list=[]
+    
+        query = """SELECT value, id
+                   FROM TissueProbeSetData
+                   WHERE Id IN {}""".format(create_in_clause(id_list))
+    
+        try :
+            results = g.db.execute(query).fetchall()
+            for result in results:
+                value_list.append(result.value)
+            symbol_value_pairs[symbol] = value_list
+        except:
+            symbol_value_pairs[symbol] = None
+    
+        #for symbol in symbol_list:
+        #    if tissue_data.has_key(symbol):
+        #        data_id = tissue_data[symbol].data_id
+        #
+        #        query = """select value, id
+        #                   from TissueProbeSetData
+        #                   where Id={}""".format(escape(data_id))
+        #        try :
+        #            results = g.db.execute(query).fetchall()
+        #            for item in results:
+        #                item = item[0]
+        #                value_list.append(item)
+        #            symbol_value_pairs[symbol] = value_list
+        #            value_list=[]
+        #        except:
+        #            symbol_value_pairs[symbol] = None
+    
+        return symbol_value_pairs
+    
+    ########################################################################################################
+    #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+    #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+    #        key is symbol, value is one list of expression values of one probeSet.
+    #function: wrapper function for getSymbolValuePairDict function
+    #          build gene symbol list if necessary, cut it into small lists if necessary,
+    #          then call getSymbolValuePairDict function and merge the results.
+    ########################################################################################################
+    
+    def get_trait_symbol_and_tissue_values(symbol_list=None):
+        tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+    
+        #symbolList,
+        #geneIdDict,
+        #dataIdDict,
+        #ChrDict,
+        #MbDict,
+        #descDict,
+        #pTargetDescDict = getTissueProbeSetXRefInfo(
+        #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+        
+        if len(tissue_data.gene_symbols):
+            return get_symbol_value_pairs(tissue_data)
+            
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index c893c887..6a64eeaf 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -286,7 +286,6 @@ class GeneralTrait(object):
                            escape(self.dataset.name),
                            escape(self.name))
             trait_info = g.db.execute(query).fetchone()
-            #print("trait_info is: ", pf(trait_info))
         #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
         # to avoid the problem of same marker name from different species.
         elif self.dataset.type == 'Geno':
@@ -359,7 +358,6 @@ class GeneralTrait(object):
                                 InbredSet.SpeciesId = Species.Id AND
                                 Species.TaxonomyId = Homologene.TaxonomyId
                         """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
-                print("-> query is:", query)
                 result = g.db.execute(query).fetchone()
                 #else:
                 #    result = None
@@ -391,7 +389,6 @@ class GeneralTrait(object):
                                 Geno.Name = '{}' and
                                 Geno.SpeciesId = Species.Id
                                 """.format(self.dataset.group.species, self.locus)
-                            print("query is:", query)
                             result = g.db.execute(query).fetchone()
                             self.locus_chr = result[0]
                             self.locus_mb = result[1]
@@ -603,4 +600,4 @@ class GeneralTrait(object):
             else:
                 ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
                 ZValue = ZValue*sqrt(self.overlap-3)
-                self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
\ No newline at end of file
+                self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
author	Zachary Sloan	2013-09-13 14:30:20 -0500
committer	Zachary Sloan	2013-09-13 14:30:20 -0500
commit	261c7852ceaecf2034923ef1c4ec1481db786edd (patch)
tree	a0c9c9da9849f8693e746f6d80a715018e0c0da6 /wqflask/base
parent	20be011f8b33fcde94037af19e403d3b76d5c9d1 (diff)
parent	af24c0d610d9a2189f86677e4f23deb372ee2bf7 (diff)
download	genenetwork2-261c7852ceaecf2034923ef1c4ec1481db786edd.tar.gz