about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLei Yan2013-10-08 17:50:08 -0500
committerLei Yan2013-10-08 17:50:08 -0500
commit9173f1e03f51cb141b0efa35b5e81c632b9a2689 (patch)
tree0655276dbfce12857462dfe0e392951d6b8de23b
parent58327f74caa0616b1f6401a1154c03e87ae5e7bf (diff)
downloadgenenetwork2-9173f1e03f51cb141b0efa35b5e81c632b9a2689.tar.gz
Literature correlation works when it is selected as the sorted
correlation type (that is, when it is run again all traits in a
database)

Added a function to data_set.py that gets all the gene_ids in the
data set. Not sure if having a separate function for getting
the gene_ids and another for getting the gene symbols makes sense.
-rwxr-xr-xwqflask/base/data_set.py15
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py65
2 files changed, 67 insertions, 13 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 5d21c901..16f9da5d 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -1078,7 +1078,20 @@ class MrnaAssayDataSet(DataSet):
 
     def retrieve_gene_symbols(self):
         query = """
-                    select ProbeSet.Name, ProbeSet.Symbol
+                    select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId
+                    from ProbeSet,ProbeSetXRef
+                    where ProbeSetXRef.ProbeSetFreezeId = %s and
+                    ProbeSetXRef.ProbeSetId=ProbeSet.Id;
+                """ % (self.id)
+        results = g.db.execute(query).fetchall()
+        symbol_dict = {}
+        for item in results:
+            symbol_dict[item[0]] = item[1]
+        return symbol_dict
+    
+    def retrieve_gene_ids(self):
+        query = """
+                    select ProbeSet.Name, ProbeSet.GeneId
                     from ProbeSet,ProbeSetXRef
                     where ProbeSetXRef.ProbeSetFreezeId = %s and
                     ProbeSetXRef.ProbeSetId=ProbeSet.Id;
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 42d5acd6..5df2f316 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -133,7 +133,7 @@ class CorrelationResults(object):
 
             if self.corr_type == "tissue":
                 trait_symbol_dict = self.dataset.retrieve_gene_symbols()
-                tissue_corr_data = self.do_tissue_corr_for_all_traits(trait_gene_symbols = trait_symbol_dict)
+                tissue_corr_data = self.do_tissue_correlation_for_all_traits(trait_gene_symbols = trait_symbol_dict)
                 #print("tissue_corr_data: ", pf(tissue_corr_data))
                 
                 for trait in tissue_corr_data.keys()[:self.return_number]:
@@ -158,8 +158,12 @@ class CorrelationResults(object):
                     #self.correlation_data[trait] = [sample_r, sample_p, num_overlap]   
  
             elif self.corr_type == "lit":
-                trait_symbol_dict = self.dataset.retrieve_gene_symbols()
+                trait_geneid_dict = self.dataset.retrieve_gene_ids()
+                lit_corr_data = self.do_lit_correlation_for_all_traits(trait_gene_ids = trait_geneid_dict)
                 
+                for trait in lit_corr_data.keys()[:self.return_number]:
+                    self.get_sample_r_and_p_values(trait = trait, target_samples = self.target_dataset.trait_data[trait])
+                    
             elif self.corr_type == "sample":
                 for trait, values in self.target_dataset.trait_data.iteritems():
                     self.get_sample_r_and_p_values(trait = trait, target_samples = values)
@@ -181,13 +185,15 @@ class CorrelationResults(object):
                 #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef,
                 #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions)
                 
+                # Set some sane defaults
+                trait_object.tissue_corr = 0
+                trait_object.tissue_pvalue = 0
+                trait_object.lit_corr = 0
                 if self.corr_type == "tissue":
                     trait_object.tissue_corr = tissue_corr_data[trait][1]
                     trait_object.tissue_pvalue = tissue_corr_data[trait][2]
-                else:
-                    # Set some sane defaults
-                    trait_object.tissue_corr = 0
-                    trait_object.tissue_pvalue = 0
+                elif self.corr_type == "lit":    
+                    trait_object.lit_corr = lit_corr_data[trait][1]
                     
                 self.correlation_results.append(trait_object)
             
@@ -299,7 +305,7 @@ class CorrelationResults(object):
         #return self.correlation_results
 
 
-    def do_tissue_corr_for_all_traits(self, trait_gene_symbols, tissue_dataset_id=1):
+    def do_tissue_correlation_for_all_traits(self, trait_gene_symbols, tissue_dataset_id=1):
         #Gets tissue expression values for the primary trait
         primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
             symbol_list = [self.this_trait.symbol])
@@ -336,7 +342,7 @@ class CorrelationResults(object):
     def do_lit_correlation_for_trait_list(self):
 
         input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
-
+        
         for trait in self.correlation_results:
 
             if trait.geneid:
@@ -350,7 +356,7 @@ class CorrelationResults(object):
                        FROM LCorrRamin3
                        WHERE GeneId1='%s' and
                              GeneId2='%s'
-                    """ % (escape(trait.mouse_gene_id), escape(self.this_trait.geneid))
+                    """ % (escape(trait.mouse_gene_id), escape(input_trait_mouse_gene_id))
                 ).fetchone()
                 if not result:
                     result = g.db.execute("""SELECT value
@@ -361,9 +367,7 @@ class CorrelationResults(object):
                     ).fetchone()
                 
                 if result:
-                    lit_corr = result.value
-                     
-                if lit_corr:
+                    lit_corr = result.value 
                     trait.lit_corr = lit_corr
                 else:
                     trait.lit_corr = 0
@@ -371,6 +375,43 @@ class CorrelationResults(object):
                 trait.lit_corr = 0
     
     
+    def do_lit_correlation_for_all_traits(self, trait_gene_ids):
+        input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
+        
+        lit_corr_data = {}
+        for trait, gene_id in trait_gene_ids.iteritems():
+            mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id)
+
+            if mouse_gene_id and str(mouse_gene_id).find(";") == -1:
+                print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id)
+                result = g.db.execute(
+                    """SELECT value
+                       FROM LCorrRamin3
+                       WHERE GeneId1='%s' and
+                             GeneId2='%s'
+                    """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
+                ).fetchone()
+                if not result:
+                    result = g.db.execute("""SELECT value
+                       FROM LCorrRamin3
+                       WHERE GeneId2='%s' and
+                             GeneId1='%s'
+                    """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
+                    ).fetchone()
+                if result:
+                    print("result:", result)
+                    lit_corr = result.value
+                    lit_corr_data[trait] = [gene_id, lit_corr]
+                else:
+                    lit_corr_data[trait] = [gene_id, 0]
+            else:
+                lit_corr_data[trait] = [gene_id, 0]
+        
+        lit_corr_data = collections.OrderedDict(sorted(lit_corr_data.items(),
+                                                           key=lambda t: -abs(t[1][1])))
+            
+        return lit_corr_data
+        
     def convert_to_mouse_gene_id(self, species=None, gene_id=None):
         """If the species is rat or human, translate the gene_id to the mouse geneid