about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLei Yan2013-09-20 17:20:52 -0500
committerLei Yan2013-09-20 17:20:52 -0500
commit183f9a0ba19b6fcdf1475285af1bb1fcd45a9442 (patch)
tree54bb010628a427b033eb8e10f5a7402af89fab48
parentaf24c0d610d9a2189f86677e4f23deb372ee2bf7 (diff)
downloadgenenetwork2-183f9a0ba19b6fcdf1475285af1bb1fcd45a9442.tar.gz
Tissue correlation results work for sample r/rho correlation page
and are written to the template
-rw-r--r--misc/gn_installation_notes.txt14
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py152
-rw-r--r--wqflask/wqflask/correlation/correlation_functions.py12
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py65
-rw-r--r--wqflask/wqflask/templates/correlation_page.html9
5 files changed, 157 insertions, 95 deletions
diff --git a/misc/gn_installation_notes.txt b/misc/gn_installation_notes.txt
index 6329586f..a73e7d4f 100644
--- a/misc/gn_installation_notes.txt
+++ b/misc/gn_installation_notes.txt
@@ -272,6 +272,12 @@ sudo apt-get install r-base-dev
 
 ===========================================
 
+Installing rpy2
+
+pip install rpy2
+
+===========================================
+
 Install Parallel Python (pp)
 
 wget http://www.parallelpython.com/downloads/pp/pp-1.6.3.tar.gz
@@ -303,7 +309,13 @@ To get server running:
 !If having seemingly inexplicable problems with imports, make sure I've started the environment!
 
 Start up virtual environment:
-source ~/ve27/bin/activate
+source ~/ve27/bin/activate==============
+
+Install requests
+
+pip install requests
+
+=======================
 
 To set WQFLASK_SETTINGS environment variable:
 export WQFLASK_SETTINGS=~/zach_settings.py (or wherever file is located)
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index 8ae71858..7eb07028 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -4,11 +4,13 @@ import collections
 
 from flask import g
 
-from utility import dbtools
-from uitility import Bunch
+from utility import db_tools
+from utility import Bunch
 
 from MySQLdb import escape_string as escape
 
+from pprint import pformat as pf
+
 class MrnaAssayTissueData(object):
     
     def __init__(self, gene_symbols=None):
@@ -35,14 +37,15 @@ class MrnaAssayTissueData(object):
         # Note that inner join is necessary in this query to get distinct record in one symbol group
         # with highest mean value
         # Due to the limit size of TissueProbeSetFreezeId table in DB,
-        # performance of inner join is acceptable.
+        # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
+        print("len(gene_symbols): ", len(gene_symbols))
         if len(gene_symbols) == 0:
             query +=  '''Symbol!='' and Symbol Is Not Null group by Symbol)
                 as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
                 and t.Mean = x.maxmean;  
                     '''
         else:
-            in_clause = dbtools.create_in_clause(gene_symbols)
+            in_clause = db_tools.create_in_clause(gene_symbols)
             
             query += ''' Symbol in {} group by Symbol)
                 as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
@@ -51,17 +54,19 @@ class MrnaAssayTissueData(object):
 
         results = g.db.execute(query).fetchall()
         for result in results:
-            symbol = item[0]
-            gene_symbols.append(symbol)
-            symbol = symbol.lower()
-            
-            self.data[symbol].gene_id = result.GeneId
-            self.data[symbol].data_id = result.DataId
-            self.data[symbol].chr = result.Chr
-            self.data[symbol].mb = result.Mb
-            self.data[symbol].description = result.description
-            self.data[symbol].probe_target_description = result.Probe_Target_Description
+            symbol = result[0]
+            if symbol in gene_symbols:
+            #gene_symbols.append(symbol)
+                symbol = symbol.lower()
+                
+                self.data[symbol].gene_id = result.GeneId
+                self.data[symbol].data_id = result.DataId
+                self.data[symbol].chr = result.Chr
+                self.data[symbol].mb = result.Mb
+                self.data[symbol].description = result.description
+                self.data[symbol].probe_target_description = result.Probe_Target_Description
 
+        #print("self.data: ", pf(self.data))
 
     ###########################################################################
     #Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -70,65 +75,72 @@ class MrnaAssayTissueData(object):
     #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
     #Attention! All keys are lower case!
     ###########################################################################
-    def get_symbol_value_pairs(self):
-        
-        id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
-    
-        symbol_value_pairs = {}
-        value_list=[]
     
-        query = """SELECT value, id
-                   FROM TissueProbeSetData
-                   WHERE Id IN {}""".format(create_in_clause(id_list))
-    
-        try :
-            results = g.db.execute(query).fetchall()
-            for result in results:
-                value_list.append(result.value)
-            symbol_value_pairs[symbol] = value_list
-        except:
-            symbol_value_pairs[symbol] = None
-    
-        #for symbol in symbol_list:
-        #    if tissue_data.has_key(symbol):
-        #        data_id = tissue_data[symbol].data_id
-        #
-        #        query = """select value, id
-        #                   from TissueProbeSetData
-        #                   where Id={}""".format(escape(data_id))
-        #        try :
-        #            results = g.db.execute(query).fetchall()
-        #            for item in results:
-        #                item = item[0]
-        #                value_list.append(item)
-        #            symbol_value_pairs[symbol] = value_list
-        #            value_list=[]
-        #        except:
-        #            symbol_value_pairs[symbol] = None
+    def get_symbol_values_pairs(self):
+        id_list = [self.data[symbol].data_id for symbol in self.data]
+
+        symbol_values_dict = {}
+        
+        query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
+                   FROM TissueProbeSetXRef, TissueProbeSetData
+                   WHERE TissueProbeSetData.Id IN {} and
+                         TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+        
+        results = g.db.execute(query).fetchall()
+        for result in results:
+            if result.Symbol.lower() not in symbol_values_dict:
+                symbol_values_dict[result.Symbol.lower()] = [result.value]
+            else:
+                symbol_values_dict[result.Symbol.lower()].append(result.value)
+
+        #for symbol in self.data:
+        #    data_id = self.data[symbol].data_id
+        #    symbol_values_dict[symbol] = self.get_tissue_values(data_id)
+        
     
-        return symbol_value_pairs
+        return symbol_values_dict
     
-    ########################################################################################################
-    #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
-    #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
-    #        key is symbol, value is one list of expression values of one probeSet.
-    #function: wrapper function for getSymbolValuePairDict function
-    #          build gene symbol list if necessary, cut it into small lists if necessary,
-    #          then call getSymbolValuePairDict function and merge the results.
-    ########################################################################################################
     
-    def get_trait_symbol_and_tissue_values(symbol_list=None):
-        tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+    #def get_tissue_values(self, data_id):
+    #    """Gets the tissue values for a particular gene"""
+    #
+    #    tissue_values=[]
+    #
+    #    query = """SELECT value, id
+    #               FROM TissueProbeSetData
+    #               WHERE Id IN {}""".format(db_tools.create_in_clause(data_id))
+    #
+    #    #try :
+    #    results = g.db.execute(query).fetchall()
+    #    for result in results:
+    #        tissue_values.append(result.value)
+    #    #symbol_values_dict[symbol] = value_list
+    #    #except:
+    #    #    symbol_values_pairs[symbol] = None
+    #
+    #    return tissue_values
     
-        #symbolList,
-        #geneIdDict,
-        #dataIdDict,
-        #ChrDict,
-        #MbDict,
-        #descDict,
-        #pTargetDescDict = getTissueProbeSetXRefInfo(
-        #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-        
-        if len(tissue_data.gene_symbols):
-            return get_symbol_value_pairs(tissue_data)
+########################################################################################################
+#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+#        key is symbol, value is one list of expression values of one probeSet.
+#function: wrapper function for getSymbolValuePairDict function
+#          build gene symbol list if necessary, cut it into small lists if necessary,
+#          then call getSymbolValuePairDict function and merge the results.
+########################################################################################################
+
+#def get_trait_symbol_and_tissue_values(symbol_list=None):
+#    tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+#
+#    #symbolList,
+#    #geneIdDict,
+#    #dataIdDict,
+#    #ChrDict,
+#    #MbDict,
+#    #descDict,
+#    #pTargetDescDict = getTissueProbeSetXRefInfo(
+#    #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+#    
+#    if len(tissue_data.gene_symbols):
+#        return get_symbol_values_pairs(tissue_data)
             
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index 56f66810..84d47bb5 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -27,7 +27,7 @@
 from __future__ import absolute_import, print_function, division
 
 import math
-#import rpy2.robjects
+import rpy2.robjects
 import pp
 import string
 
@@ -494,7 +494,7 @@ pcor.rec <- function(x,y,z,method="p",na.rm=T){
 
 #XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait
 #XZ: The primaryTrait and targetTrait should have executed retrieveData function
-def calZeroOrderCorr (primaryTrait, targetTrait, method='pearson'):
+def calZeroOrderCorr(primaryTrait, targetTrait, method='pearson'):
 
     #primaryTrait.retrieveData()
 
@@ -560,7 +560,7 @@ def calZeroOrderCorr (primaryTrait, targetTrait, method='pearson'):
 #the same tissue order
 #####################################################################################
 
-def calZeroOrderCorrForTiss (primaryValue=[], targetValue=[], method='pearson'):
+def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pearson'):
 
     R_primary = rpy2.robjects.FloatVector(range(len(primaryValue)))
     N = len(primaryValue)
@@ -809,6 +809,9 @@ def get_trait_symbol_and_tissue_values(symbol_list=None):
     
     tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
 
+    if len(tissue_data.gene_symbols):
+        return tissue_data.get_symbol_values_pairs()
+        
     #symbolList,
     #geneIdDict,
     #dataIdDict,
@@ -818,9 +821,6 @@ def get_trait_symbol_and_tissue_values(symbol_list=None):
     #pTargetDescDict = getTissueProbeSetXRefInfo(
     #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
     
-    if len(tissue_data.gene_symbols):
-        return get_symbol_value_pairs(tissue_data)
-        
     #limit_num=1000
     #count = len(symbol_list)
     #
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index b17e1db1..b9d009af 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -105,7 +105,7 @@ class CorrelationResults(object):
 
             self.sample_data = {}
             self.corr_method = start_vars['corr_sample_method']
-            self.return_number = 500
+            self.return_number = 50
 
             #The two if statements below append samples to the sample list based upon whether the user
             #rselected Primary Samples Only, Other Samples Only, or All Samples
@@ -158,15 +158,27 @@ class CorrelationResults(object):
 
             for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
                 trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
+            
+                print("gene symbol: ", trait_object.symbol)
+                
                 trait_object.sample_r = self.correlation_data[trait][0]
                 trait_object.sample_p = self.correlation_data[trait][1]
                 trait_object.num_overlap = self.correlation_data[trait][2]
                 
+                #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef,
+                #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions)
+                
+                
+                
                 # Set some sane defaults
-                trait_object.tissue_corr = None
-                trait_object.tissue_pvalue = None
+                trait_object.tissue_corr = 0
+                trait_object.tissue_pvalue = 0
 
                 self.correlation_results.append(trait_object)
+            
+            self.do_tissue_correlation_by_list()
+            
+            print("self.correlation_results: ", pf(self.correlation_results))
                 
             
                 
@@ -183,7 +195,7 @@ class CorrelationResults(object):
                 #        mb = trait_object.mb
                 #    )
                 #    if trait_object.mean:
-                #        trait_info[mean] = trait_object.mean
+                   #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias, #        trait_info[mean] = trait_object.mean
                 #    if hasattr(trait_object, 'mean'):
                 #       trait_info[mean] = trait_object.mean
                 #    if hasattr(trait_object, 'lrs'):
@@ -197,7 +209,8 @@ class CorrelationResults(object):
                 #        correlation = float(self.correlation_data[trait][0]),
                 #        p_value = float(self.correlation_data[trait][1]),
                 #        symbol = trait_object.symbol,
-                #        alias = trait_object.alias,
+                #        alias = trai
+    #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias,
                 #        description = trait_object.description,
                 #        chromosome = trait_object.chr,
                 #        mb = trait_object.mb
@@ -637,7 +650,15 @@ class CorrelationResults(object):
         for entry in results:
             trait_name, tissue_corr, tissue_pvalue = entry
             tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
-
+    #symbolList,
+    #geneIdDict,
+    #dataIdDict,
+    #ChrDict,
+    #MbDict,
+    #descDict,
+    #pTargetDescDict = getTissueProbeSetXRefInfo(
+    #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+    
         g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table)))
 
         return tissue_corr_dict
@@ -944,13 +965,17 @@ class CorrelationResults(object):
         return (symbolCorrDict, symbolPvalueDict)
 
 
-    def do_tissue_correlation_by_list(self, tissue_dataset_id):
+    def do_tissue_correlation_by_list(self, tissue_dataset_id=1):
+        """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each"""
 
-        trait_symbol_and_values = correlation_functions.get_trait_symbol_and_tissue_values(
-            gene_name_list = [self.this_trait.symbol])
+        #Gets tissue expression values for the primary trait
+        primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+            symbol_list = [self.this_trait.symbol])
+        
+        print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict))
 
-        if self.this_trait.symbol.lower() in trait_symbol_and_values:
-            primary_trait_value = trait_symbol_and_values[self.this_trait_symbol.lower()]
+        if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+            primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
             
             #gene_symbol_list = []
             #
@@ -960,19 +985,25 @@ class CorrelationResults(object):
             
             gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
 
-            symbol_value_dict = correlation_functions.get_trait_gene_symbol_and_tissue_values(
-                                                    gene_symbol_list=gene_symbol_list)
+            corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
+                                                    symbol_list=gene_symbol_list)
+
+            print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict))
 
             for trait in self.correlation_results:
-                if trait.symbol and trait.symbol.lower() in symbol_value_dict:
-                    this_trait_value = symbol_value_dict[trait.symbol.lower()]
+                if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict:
+                    this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()]
                     
-                    result = correlation_functions.calZeroOrderCorrForTiss(primary_trait_value,
-                                                                          this_trait_value,
+                    result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
+                                                                          this_trait_tissue_values,
                                                                           self.corr_method)
  
                     trait.tissue_corr = result[0]
                     trait.tissue_pvalue = result[2]
+                    
+                    #print("trait.tissue_corr / pvalue: ", str(trait.tissue_corr) + " :: " + str(trait.tissue_pvalue))
+                    
+
         #        else:
         #            trait.tissue_corr = None
         #            trait.tissue_pvalue = None
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index 53b12545..7082dbf2 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -28,10 +28,15 @@
                 <th>Sample r</th>
                 <th>N Cases</th>
                 <th>Sample p(r)</th>
+                <th>Tissue r</th>
+                <th>Tissue p(r)</th>
                 {% else %}
                 <th>Sample rho</th>
                 <th>Sample p(rho)</th>
+                <th>Tissue rho</th>
+                <th>Tissue p(rho)</th>
                 {% endif %}
+            
             </tr>
         </thead>
         <tbody>
@@ -41,13 +46,15 @@
                 <td>{{ trait.symbol }}</td>
                 <td>{{ trait.alias }}</td>
                 <td>{{ trait.description }}</td>
-                <td>Chr{{ trait.chr }}:{{'%0.6f'|format(trait.mb)}}</td>
+                <td>Chr{{ trait.chr }}:{{trait.mb}}</td>
                 <td>{{'%0.3f'|format(trait.mean)}}</td>
                 <td>{{'%0.3f'|format(trait.lrs)}}</td>
                 <td>Chr{{ trait.locus_chr }}:{{'%0.6f'|format(trait.locus_mb)}}</td>
                 <td>{{'%0.3f'|format(trait.sample_r)}}</td>
                 <td>{{ trait.num_overlap }}</td>
                 <td>{{'%0.3e'|format(trait.sample_p)}}</td>
+                <td>{{'%0.3f'|format(trait.tissue_corr)}}</td>
+                <td>{{'%0.3e'|format(trait.tissue_pvalue)}}</td>
             </tr>
         {% endfor %}
         </tbody>