Rewrote some code in get_trait_info in dataset.py

Added spearman correlation to show_corr_results and template
author: Lei Yan 2013-05-30 23:14:50 +0000
committer: Lei Yan 2013-05-30 23:14:50 +0000
commit: 466be48f92d4943995c7a3e7bcb9fd1efd775bf6 (patch)
tree: 3afd09770d889176cf96b9be4425daddda00626d
parent: cb639316fe007c8bcad731976e8b095dee59115e (diff)
download: genenetwork2-466be48f92d4943995c7a3e7bcb9fd1efd775bf6.tar.gz
4 files changed, 126 insertions, 89 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index c2380f8c..4c5c46a5 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -672,12 +672,13 @@ class MrnaAssayDataSet(DataSet):
             query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
                                                                      self.type,
                                                                      self.type))
-            #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
+
             for item in sample_ids_step:
                 query += """
                         left join {}Data as T{} on T{}.Id = {}XRef.DataId
                         and T{}.StrainId={}\n
                         """.format(*mescape(self.type, item, item, self.type, item, item))
+                        
             query += """
                     WHERE {}XRef.{}FreezeId = {}Freeze.Id
                     and {}Freeze.Name = '{}'
@@ -690,17 +691,19 @@ class MrnaAssayDataSet(DataSet):
 
         trait_count = len(trait_sample_data[0])
         self.trait_data = collections.defaultdict(list)
+        
         # put all of the separate data together into a dictionary where the keys are
         # trait names and values are lists of sample values
-        for j in range(trait_count):
-            trait_name = trait_sample_data[0][j][0]
-            for i in range(int(number_chunks)):
-                self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:]
-
+        for trait_counter in range(trait_count):
+            trait_name = trait_sample_data[0][trait_counter][0]
+            for chunk_counter in range(int(number_chunks)):
+                self.trait_data[trait_name] += (
+                    trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
+    
 
     def get_trait_info(self, trait_list=None, species=''):
 
-        #  Note: setting trait_list to [] is probably not a great idea.
+        #  Note: setting trait_list to [] is probably not a great idea. 
         if not trait_list:
             trait_list = []
 
@@ -709,9 +712,7 @@ class MrnaAssayDataSet(DataSet):
             if not this_trait.haveinfo:
                 this_trait.retrieveInfo(QTL=1)
 
-            if this_trait.symbol:
-                pass
-            else:
+            if not this_trait.symbol:
                 this_trait.symbol = "N/A"
 
             #XZ, 12/08/2008: description
@@ -719,60 +720,56 @@ class MrnaAssayDataSet(DataSet):
             description_string = str(this_trait.description).strip()
             target_string = str(this_trait.probe_target_description).strip()
 
-            description_display = ''
-
             if len(description_string) > 1 and description_string != 'None':
                 description_display = description_string
             else:
                 description_display = this_trait.symbol
 
-            if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None':
+            if (len(description_display) > 1 and description_display != 'N/A' and
+                    len(target_string) > 1 and target_string != 'None'):
                 description_display = description_display + '; ' + target_string.strip()
 
             # Save it for the jinja2 template
             this_trait.description_display = description_display
-            #print("  xxxxdd [%s]: %s" % (type(this_trait.description_display), description_display))
 
             #XZ: trait_location_value is used for sorting
             trait_location_repr = 'N/A'
             trait_location_value = 1000000
 
             if this_trait.chr and this_trait.mb:
-                try:
-                    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
-                except:
-                    if this_trait.chr.upper() == 'X':
-                        trait_location_value = 20*1000 + this_trait.mb
-                    else:
-                        trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
-
-                this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) )
+                #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+                #This is so we can convert the location to a number used for sorting
+                trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb)
+                #try:
+                #    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
+                #except ValueError:
+                #    if this_trait.chr.upper() == 'X':
+                #        trait_location_value = 20*1000 + this_trait.mb
+                #    else:
+                #        trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 +
+                #                               this_trait.mb)
+
+                #ZS: Put this in function currently called "convert_location_to_value"
+                this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr,
+                                                                float(this_trait.mb))
                 this_trait.location_value = trait_location_value
-                #this_trait.trait_location_value = trait_location_value
 
-            #XZ, 01/12/08: This SQL query is much faster.
+            #Get mean expression value
             query = (
-"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
-    where ProbeSetXRef.ProbeSetFreezeId = %s and
-    ProbeSet.Id = ProbeSetXRef.ProbeSetId and
-    ProbeSet.Name = '%s'
+            """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
+                where ProbeSetXRef.ProbeSetFreezeId = %s and
+                ProbeSet.Id = ProbeSetXRef.ProbeSetId and
+                ProbeSet.Name = '%s'
             """ % (escape(str(this_trait.dataset.id)),
                    escape(this_trait.name)))
 
             print("query is:", pf(query))
 
             result = g.db.execute(query).fetchone()
+            
+            mean = result[0] if result else 0
 
-            if result:
-                if result[0]:
-                    mean = result[0]
-                else:
-                    mean=0
-            else:
-                mean = 0
-
-            #XZ, 06/05/2009: It is neccessary to turn on nowrap
-            this_trait.mean = repr = "%2.3f" % mean
+            this_trait.mean = "%2.3f" % mean
 
             #LRS and its location
             this_trait.LRS_score_repr = 'N/A'
@@ -791,23 +788,39 @@ class MrnaAssayDataSet(DataSet):
                 result = self.cursor.fetchone()
 
                 if result:
-                    if result[0] and result[1]:
-                        LRS_Chr = result[0]
-                        LRS_Mb = result[1]
-
-                        #XZ: LRS_location_value is used for sorting
-                        try:
-                            LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
-                        except:
-                            if LRS_Chr.upper() == 'X':
-                                LRS_location_value = 20*1000 + float(LRS_Mb)
-                            else:
-                                LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
+                    #if result[0] and result[1]:
+                    #    lrs_chr = result[0]
+                    #    lrs_mb = result[1]
+                    lrs_chr, lrs_mb = result
+                    #XZ: LRS_location_value is used for sorting
+                    lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb)
+                    
+                    #try:
+                    #    lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb)
+                    #except:
+                    #    if lrs_chr.upper() == 'X':
+                    #        lrs_location_value = 20*1000 + float(lrs_mb)
+                    #    else:
+                    #        lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 +
+                    #                              float(lrs_mb))
+
+                    this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
+                    this_trait.LRS_score_value = this_trait.lrs
+                    this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
+      
+
+    def convert_location_to_value(chromosome, mb):
+        try:
+            location_value = int(chromosome)*1000 + float(mb)
+        except ValueError:
+            if chromosome.upper() == 'X':
+                location_value = 20*1000 + float(mb)
+            else:
+                location_value = (ord(str(chromosome).upper()[0])*1000 +
+                                  float(mb))
+        
+        return location_value
 
-                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
-                        this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
-                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
-                        
     def get_sequence(self):
         query = """
                     SELECT
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 7c1c035c..5fde114f 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -15,7 +15,7 @@ from pprint import pformat as pf
 
 from flask import Flask, g
 
-class GeneralTrait:
+class GeneralTrait(object):
     """
     Trait class defines a trait in webqtl, can be either Microarray,
     Published phenotype, genotype, or user input trait
@@ -78,7 +78,7 @@ class GeneralTrait:
                 #desc = self.handle_pca(desc)
                 stringy = desc
         return stringy
-    
+
 
 
     def display_name(self):
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index aa20eba1..5d40c835 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -30,7 +30,6 @@
 from __future__ import absolute_import, print_function, division
 
 import string
-from math import *
 import cPickle
 import os
 import time
@@ -106,6 +105,7 @@ class CorrelationResults(object):
             corr_samples_group = start_vars['corr_samples_group']
     
             self.sample_data = {}
+            self.corr_method = start_vars['corr_sample_method']
     
             #The two if statements below append samples to the sample list based upon whether the user
             #rselected Primary Samples Only, Other Samples Only, or All Samples
@@ -123,27 +123,31 @@ class CorrelationResults(object):
             #if statement if the user selected All Samples)
             if corr_samples_group != 'samples_primary':
                 self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
+                
             self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
             self.target_dataset.get_trait_data()
+            
             self.correlation_data = {}
             for trait, values in self.target_dataset.trait_data.iteritems():
-                trait_values = []
+                this_trait_values = []
                 target_values = []
                 for index, sample in enumerate(self.target_dataset.samplelist):
-                    target_value = values[index]
-                    if sample in self.sample_data.keys():
-                        this_value = self.sample_data[sample]
-                        trait_values.append(this_value)
-                        target_values.append(target_value)
-                (trait_values, target_values) = normalize_values(trait_values, target_values)
-                correlation = scipy.stats.pearsonr(trait_values, target_values)
-                #correlation = cal_correlation(trait_values, target_values)
-                self.correlation_data[trait] = correlation[0]
-                #print ('correlation result: %s %s' % (trait, correlation))
-        
-        for trait in self.correlation_data:
-            print("correlation: ", self.correlation_data[trait])
-        
+                    if sample in self.sample_data:
+                        sample_value = self.sample_data[sample]
+                        target_sample_value = values[index]
+                        this_trait_values.append(sample_value)
+                        target_values.append(target_sample_value)
+
+                this_trait_values, target_values = normalize_values(this_trait_values, target_values)
+                if self.corr_method == 'pearson':
+                    sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values)
+                else:
+                    sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values)
+                self.correlation_data[trait] = [sample_r, sample_p]
+            self.correlation_data = collections.OrderedDict(
+                sorted(self.correlation_data.items(),
+                        key=lambda t: -abs(t[1][0])))
+
 
         #XZ, 09/18/2008: get all information about the user selected database.
         #target_db_name = fd.corr_dataset
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index be750a0c..68fe81ed 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -1,21 +1,42 @@
 {% extends "base.html" %}
-{% block content %}
-<table id="corr_results" class="table table-hover table-striped table-bordered">
-    <thead>
-        <tr>
-            <td>Correlation</td>
-        </tr>
-    </thead>
-    <tbody>
-    {% for trait in correlation_data %}
-        <tr>
-            <td>{{ correlation_data[trait] }}</td>
-        </tr>
-    {% endfor %}
-    </tbody>
-</table>
+{% block css %}
+    <link rel="stylesheet" type="text/css" href="/static/packages/jqplot/jquery.jqplot.min.css" />
+    <link rel="stylesheet" type="text/css" href="/static/new/packages/DataTables/css/jquery.dataTables.css" />
+    <link rel="stylesheet" type="text/css" href="/static/packages/DT_bootstrap/DT_bootstrap.css" />
+    <link rel="stylesheet" type="text/css" href="/static/packages/TableTools/media/css/TableTools.css" />
 {% endblock %}
+{% block content %}
 
+    <header class="jumbotron subhead" id="overview">
+        <div class="container">
+            <h1>Correlation</h1>
+        </div>
+    </header>
+
+    <table id="corr_results" class="table table-hover table-striped table-bordered">
+        <thead>
+            <tr>
+                <th>Trait</th>
+                {% if corr_method == 'pearson' %}
+                <th>Sample r</th>
+                <th>Sample p(r)</th>
+                {% else %}
+                <th>Sample rho</th>
+                <th>Sample p(rho)</th>
+                {% endif %}
+            </tr>
+        </thead>
+        <tbody>
+        {% for trait in correlation_data %}
+            <tr>
+                <td>{{ trait }}</td>
+                <td>{{ correlation_data[trait][0] }}</td>
+                <td>{{ correlation_data[trait][1] }}</td>
+            </tr>
+        {% endfor %}
+        </tbody>
+    </table>
+{% endblock %}
 
 {% block js %}  
     <script language="javascript" type="text/javascript" src="/static/new/packages/DataTables/js/jquery.js"></script>
@@ -23,7 +44,6 @@
     <script language="javascript" type="text/javascript" src="/static/packages/DT_bootstrap/DT_bootstrap.js"></script>
     <script language="javascript" type="text/javascript" src="/static/packages/TableTools/media/js/TableTools.min.js"></script>
     <script language="javascript" type="text/javascript" src="/static/packages/underscore/underscore-min.js"></script>
-
     <script type="text/javascript" charset="utf-8">
         $(document).ready( function () {
             console.time("Creating table");
author	Lei Yan	2013-05-30 23:14:50 +0000
committer	Lei Yan	2013-05-30 23:14:50 +0000
commit	466be48f92d4943995c7a3e7bcb9fd1efd775bf6 (patch)
tree	3afd09770d889176cf96b9be4425daddda00626d
parent	cb639316fe007c8bcad731976e8b095dee59115e (diff)
download	genenetwork2-466be48f92d4943995c7a3e7bcb9fd1efd775bf6.tar.gz