Worked on correlation page

Wrote function in dataset.py that gets all the traits in a dataset and their sample values
author: Lei Yan 2013-05-08 22:16:26 +0000
committer: Lei Yan 2013-05-08 22:16:26 +0000
commit: b4371ef0d96605187b7474e7e4844dbebab67d8b (patch)
tree: 14b68ddaa9916494551c4306c63b1a7719f7d993 /wqflask
parent: 34312cfacdcb5af450d33ac3b54d7c01a6a61788 (diff)
download: genenetwork2-b4371ef0d96605187b7474e7e4844dbebab67d8b.tar.gz
10 files changed, 431 insertions, 229 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 111597a9..1520b180 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -23,6 +23,8 @@
 from __future__ import absolute_import, print_function, division
 import os
 import math
+import string
+import collections
 
 import json
 import itertools
@@ -49,23 +51,29 @@ def create_dataset(dataset_name):
     query = """
         SELECT DBType.Name
         FROM DBList, DBType
-        WHERE DBList.Name = '%s' and
+        WHERE DBList.Name = '{}' and
               DBType.Id = DBList.DBTypeId
-        """ % (escape(dataset_name))
-    print("query is: ", pf(query))
+        """.format(escape(dataset_name))
+    #print("query is: ", pf(query))
     dataset_type = g.db.execute(query).fetchone().Name
 
     #dataset_type = cursor.fetchone()[0]
-    print("[blubber] dataset_type:", pf(dataset_type))
+    #print("[blubber] dataset_type:", pf(dataset_type))
 
     dataset_ob = DS_NAME_MAP[dataset_type]
     #dataset_class = getattr(data_set, dataset_ob)
-    print("dataset_ob:", dataset_ob)
-    print("DS_NAME_MAP:", pf(DS_NAME_MAP))
+    #print("dataset_ob:", dataset_ob)
+    #print("DS_NAME_MAP:", pf(DS_NAME_MAP))
 
     dataset_class = globals()[dataset_ob]
     return dataset_class(dataset_name)
 
+def mescape(*items):
+    """Multiple escape"""
+    escaped = [escape(item) for item in items]
+    print("escaped is:", escaped)
+    return escaped
+
 
 class Markers(object):
     """Todo: Build in cacheing so it saves us reading the same file more than once"""
@@ -74,15 +82,21 @@ class Markers(object):
         self.markers = json.load(json_data_fh)
     
     def add_pvalues(self, p_values):
+        print("length of self.markers:", len(self.markers))
+        print("length of p_values:", len(p_values))
+        
         # THIS IS only needed for the case when we are limiting the number of p-values calculated
-        if len(self.markers) > len(p_values):
+        if len(self.markers) < len(p_values):
             self.markers = self.markers[:len(p_values)]
         
         for marker, p_value in itertools.izip(self.markers, p_values):
             marker['p_value'] = p_value
+            print("p_value is:", marker['p_value'])
             marker['lod_score'] = -math.log10(marker['p_value'])
             #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
             marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+        
+        
 
 
 class HumanMarkers(Markers):
@@ -93,9 +107,9 @@ class HumanMarkers(Markers):
         for line in marker_data_fh:
             splat = line.strip().split()
             marker = {}
-            marker['chr'] = splat[0]
+            marker['chr'] = int(splat[0])
             marker['name'] = splat[1]
-            marker['Mb'] = str(float(splat[3]) / 1000000)
+            marker['Mb'] = float(splat[3]) / 1000000
             self.markers.append(marker)
             
         #print("markers is: ", pf(self.markers))
@@ -116,8 +130,7 @@ class HumanMarkers(Markers):
         with Bench("deleting markers"):
             markers = []
             for marker in self.markers:
-                #if not float(marker['Mb']) <= 0 or not float(marker['chr']) == 0:
-                if float(marker['Mb']) > 0 and marker['chr'] != "0":
+                if not marker['Mb'] <= 0 and not marker['chr'] == 0:
                     markers.append(marker)
             self.markers = markers
         
@@ -349,6 +362,19 @@ class PhenotypeDataSet(DataSet):
         # (Urgently?) Need to write this
         pass
 
+    def get_trait_list(self):
+        query = """
+            select PublishXRef.Id
+            from PublishXRef, PublishFreeze
+            where PublishFreeze.InbredSetId=PublishXRef.InbredSetId
+            and PublishFreeze.Id = {}
+            """.format(escape(str(self.id)))
+        results = g.db.execute(query).fetchall()
+        trait_data = {}
+        for trait in results:
+            trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+        return trait_data
+
     def get_trait_info(self, trait_list, species = ''):
         for this_trait in trait_list:
             if not this_trait.haveinfo:
@@ -359,9 +385,7 @@ class PhenotypeDataSet(DataSet):
                 continue   # for now
                 if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
                     description = this_trait.pre_publication_description
-            this_trait.description_display = description.decode('utf-8')
-            
-            
+            this_trait.description_display = description
 
             if not this_trait.year.isdigit():
                 this_trait.pubmed_text = "N/A"
@@ -419,7 +443,7 @@ class PhenotypeDataSet(DataSet):
                             PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
                     Order BY
                             Strain.Name
-                    """ % (trait.name, self.id)
+                    """ % (trait, self.id)
         results = g.db.execute(query).fetchall()
         return results
 
@@ -459,6 +483,19 @@ class GenotypeDataSet(DataSet):
 
     def check_confidentiality(self):
         return geno_mrna_confidentiality(self)
+    
+    def get_trait_list(self):
+        query = """
+            select Geno.Name
+            from Geno, GenoXRef
+            where GenoXRef.GenoId = Geno.Id
+            and GenoFreezeId = {}
+            """.format(escape(str(self.id)))
+        results = g.db.execute(query).fetchall()
+        trait_data = {}
+        for trait in results:
+            trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+        return trait_data
 
     def get_trait_info(self, trait_list, species=None):
         for this_trait in trait_list:
@@ -497,7 +534,7 @@ class GenotypeDataSet(DataSet):
                             GenoData.StrainId = Strain.Id
                     Order BY
                             Strain.Name
-                    """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait.name, self.name)
+                    """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)
         results = g.db.execute(query).fetchall()
         return results
 
@@ -569,7 +606,95 @@ class MrnaAssayDataSet(DataSet):
 
     def check_confidentiality(self):
         return geno_mrna_confidentiality(self)
+        
+    def get_trait_list_1(self):
+        query = """
+            select ProbeSet.Name
+            from ProbeSet, ProbeSetXRef
+            where ProbeSetXRef.ProbeSetId = ProbeSet.Id
+            and ProbeSetFreezeId = {}
+            """.format(escape(str(self.id)))
+        results = g.db.execute(query).fetchall()
+        print("After get_trait_list query")
+        trait_data = {}
+        for trait in results:
+            print("Retrieving sample_data for ", trait[0])
+            trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+        print("After retrieve_sample_data")
+        return trait_data
+    
+    def get_trait_data(self):
+        sample_ids = []
+        for sample in self.group.samplelist:
+            query = """
+                SELECT Strain.Id FROM Strain, Species
+                WHERE Strain.Name = '{}'
+                and Strain.SpeciesId=Species.Id
+                and Species.name = '{}'
+                """.format(*mescape(sample, self.group.species))
+            this_id = g.db.execute(query).fetchone()[0]
+            sample_ids.append('%d' % this_id)
+        print("sample_ids size: ", len(sample_ids))
+
+        # MySQL limits the number of tables that can be used in a join to 61,
+        # so we break the sample ids into smaller chunks
+        chunk_count = 50
+        n = len(sample_ids) / chunk_count
+        if len(sample_ids) % chunk_count:
+            n += 1
+        print("n: ", n)
+        #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId 
+        #tempTable = None
+        #if GeneId and db.type == "ProbeSet": 
+        #    if method == "3":
+        #        tempTable = self.getTempLiteratureTable(species=species,
+        #                                                input_species_geneid=GeneId,
+        #                                                returnNumber=returnNumber)
+        #
+        #    if method == "4" or method == "5":
+        #        tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
+        #                                        TissueProbeSetFreezeId=tissueProbeSetFreezeId,
+        #                                        method=method,
+        #                                        returnNumber=returnNumber)
+        trait_sample_data = []
+        for step in range(int(n)):
+            temp = []
+            sample_ids_step = sample_ids[step*chunk_count:min(len(sample_ids), (step+1)*chunk_count)]
+            for item in sample_ids_step:
+                temp.append('T%s.value' % item)
+            query = "SELECT {}.Name,".format(escape(self.type))
+            data_start_pos = 1
+            query += string.join(temp, ', ')
+            query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
+                                                                     self.type,
+                                                                     self.type))
+            #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
+            for item in sample_ids_step:
+                query += """
+                        left join {}Data as T{} on T{}.Id = {}XRef.DataId
+                        and T{}.StrainId={}\n
+                        """.format(*mescape(self.type, item, item, self.type, item, item))
+            query += """
+                    WHERE {}XRef.{}FreezeId = {}Freeze.Id
+                    and {}Freeze.Name = '{}'
+                    and {}.Id = {}XRef.{}Id
+                    order by {}.Id
+                    """.format(*mescape(self.type, self.type, self.type, self.type,
+                               self.name, self.type, self.type, self.type, self.type))
+            print("query: ", query)
+            results = g.db.execute(query).fetchall()
+            trait_sample_data.append(results)
+            
+        trait_count = len(trait_sample_data[0])
+        self.trait_data = collections.defaultdict(list)
+        # put all of the separate data together into a dictionary where the keys are
+        # trait names and values are lists of sample values
+        for j in range(trait_count):
+            trait_name = trait_sample_data[0][j][0]
+            for i in range(int(n)):
+                self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:]
 
+    
     def get_trait_info(self, trait_list=None, species=''):
 
         #  Note: setting trait_list to [] is probably not a great idea.
@@ -693,9 +818,9 @@ class MrnaAssayDataSet(DataSet):
                             ProbeSetFreeze.Name = %s
                 """ % (escape(self.name), escape(self.dataset.name))
         results = g.db.execute(query).fetchone()
-
         return results[0]
     
+   
     def retrieve_sample_data(self, trait):
         query = """
                     SELECT
@@ -712,7 +837,7 @@ class MrnaAssayDataSet(DataSet):
                             ProbeSetData.StrainId = Strain.Id
                     Order BY
                             Strain.Name
-                    """ % (escape(trait.name), escape(self.name))
+                    """ % (escape(trait), escape(self.name))
         results = g.db.execute(query).fetchall()
         return results
 
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 2af4bc24..7c1c035c 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -208,7 +208,7 @@ class GeneralTrait:
         #            ''' % (self.cellid, self.name, self.dataset.name)
         #            
         #else:
-        results = self.dataset.retrieve_sample_data(self)
+        results = self.dataset.retrieve_sample_data(self.name)
 
         # Todo: is this necessary? If not remove
         self.data.clear()
diff --git a/wqflask/other_config/wqflask-nginx.conf b/wqflask/other_config/wqflask-nginx.conf
index 50f9d73c..4e6fd0d9 100644
--- a/wqflask/other_config/wqflask-nginx.conf
+++ b/wqflask/other_config/wqflask-nginx.conf
@@ -2,7 +2,7 @@ server {
     # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/
     listen 80;
 
-    server_name _;
+    server_name gn2python.genenetwork.org;
 
     access_log  /var/log/nginx/access.log;
     error_log  /var/log/nginx/error.log;
@@ -37,6 +37,47 @@ server {
             proxy_set_header   X-Forwarded-For  $proxy_add_x_forwarded_for;
             
             proxy_read_timeout 40m;
-     }
+    }
+}
+
+server {
+    # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/
+    listen 80;
+
+    server_name gn2python_lei.genenetwork.org;
+
+    access_log  /var/log/nginx/lei_access.log;
+    error_log  /var/log/nginx/lei_error.log;
+
+    location ^~ /css/ {
+            root /gene/wqflask/wqflask/static/;
+    }
+
+    location ^~ /javascript/ {
+            root /gene/wqflask/wqflask/static/;
+    }
+
+#    location ^~ /image/ {
+#           root /gene/wqflask/wqflask/static/;
+#       }
+
+    location ^~ /images/ {
+            root /gene/wqflask/wqflask/static/;
+    }
+    
+    ### New - added by Sam
+    #location ^~ /static/ {
+    #       root /gene/wqflask/wqflask/static/;
+    #}
 
+    location / {
+            proxy_pass         http://127.0.0.1:5001/;
+            proxy_redirect     off;
+
+            proxy_set_header   Host             $host;
+            proxy_set_header   X-Real-IP        $remote_addr;
+            proxy_set_header   X-Forwarded-For  $proxy_add_x_forwarded_for;
+            
+            proxy_read_timeout 40m;
+    }
 }
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 96298b37..1d0368cc 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -54,6 +54,7 @@ from dbFunction import webqtlDatabaseFunction
 import utility.webqtlUtil #this is for parallel computing only.
 from wqflask.correlation import correlationFunction
 
+from pprint import pformat as pf
 
 METHOD_SAMPLE_PEARSON = "1"
 METHOD_SAMPLE_RANK = "2"
@@ -92,7 +93,8 @@ class Trait(object):
     def calculate_correlation(self, values, method):
         """Calculate the correlation value and p value according to the method specified"""
 
-        #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for
+        #ZS: This takes the list of values of the trait our selected trait is being correlated
+        #against and removes the values of the samples our trait has no value for
         #There's probably a better way of dealing with this, but I'll have to ask Christian
         updated_raw_values = []
         updated_values = []
@@ -276,57 +278,48 @@ class CorrelationResults(object):
     #    return templatePage.error(heading = heading, detail = [message], error=error)
 
     def __init__(self, start_vars):
-        #self.dataset = create_dataset(start_vars['dataset_name'])
-        #self.dataset.group.read_genotype_file()
-        #self.genotype = self.dataset.group.genotype
-        #
+        # get trait list from db (database name)
+        # calculate correlation with Base vector and targets
+        
         #self.this_trait = GeneralTrait(dataset=self.dataset.name,
         #                               name=start_vars['trait_id'],
         #                               cellid=None)                
         
-        helper_functions.get_dataset_and_trait(self, start_vars)
+        print("start_vars: ", pf(start_vars))
+        
+        helper_functions.get_species_dataset_trait(self, start_vars)
+        self.dataset.group.read_genotype_file()
         
         self.samples = []   # Want only ones with values
         self.vals = []
-        self.variances = []
 
         corr_samples_group = start_vars['corr_samples_group']
+
+        #The two if statements below append samples to the sample list based upon whether the user
+        #selected Primary Samples Only, Other Samples Only, or All Samples
+
+        #If either BXD/whatever Only or All Samples, append all of that group's samplelist      
         if corr_samples_group != 'samples_other':
             self.process_samples(start_vars, self.dataset.group.samplelist, ())
-            #for sample in self.dataset.group.samplelist:
-            #    value = start_vars['value:' + sample]
-            #    variance = start_vars['variance:' + sample]
-            #    if variance.strip().lower() == 'x':
-            #        variance = 0
-            #    else:
-            #        variance = float(variance)
-            #    if value.strip().lower() != 'x':
-            #        self.samples.append(str(sample))
-            #        self.vals.append(float(value))
-            #        self.variances.append(variance)
         
+        #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
+        #exclude the primary samples (because they would have been added in the previous
+        #if statement if the user selected All Samples)
         if corr_samples_group != 'samples_primary':
             primary_samples = (self.dataset.group.parlist +
                                    self.dataset.group.f1list +
                                    self.dataset.group.samplelist)
             self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
-            #for sample in self.this_trait.data.keys():
-            #    if sample not in primary_samples:
-            #        value = start_vars['value:' + sample]
-            #        variance = start_vars['variance:' + sample]
-            #        if variance.strip().lower() == 'x':
-            #            variance = 0
-            #        else:
-            #            variance = float(variance)
-            #        if value.strip().lower() != 'x':
-            #            self.samples.append(str(sample))
-            #            self.vals.append(float(value))
-            #            self.variances.append(variance)
-
-        print("self.samples is:", pf(self.samples))
-
-        #sample_list = get_sample_data(fd)
-        #print("sample_list is", pf(sample_list))
+
+        #for i, sample in enumerate(self.samples):
+        #    print("{} : {}".format(sample, self.vals[i]))
+    
+        self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
+        self.target_dataset.get_trait_data()
+        print("trait_list: {}".format(pf(self.target_dataset.trait_data)))
+        # Lei Yan todo
+        for trait, values in self.target_dataset.trait_data.iteritems():
+            correlation = calCorrelation(values, )
 
         #XZ, 09/18/2008: get all information about the user selected database.
         #target_db_name = fd.corr_dataset
@@ -753,6 +746,39 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php');
         else:
             self.dict['body'] = ""
 
+    def get_all_dataset_data(self):
+        
+        """
+        SELECT ProbeSet.Name, T128.value, T129.value, T130.value, T131.value, T132.value, T134.value, T135.value, T138.value, T139.value, T140.value, T141.value, T142.value, T144
+        .value, T145.value, T147.value, T148.value, T149.value, T487.value, T919.value, T920.value, T922.value
+        FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+        left join ProbeSetData as T128 on T128.Id = ProbeSetXRef.DataId and T128.StrainId=128
+        left join ProbeSetData as T129 on T129.Id = ProbeSetXRef.DataId and T129.StrainId=129
+        left join ProbeSetData as T130 on T130.Id = ProbeSetXRef.DataId and T130.StrainId=130
+        left join ProbeSetData as T131 on T131.Id = ProbeSetXRef.DataId and T131.StrainId=131
+        left join ProbeSetData as T132 on T132.Id = ProbeSetXRef.DataId and T132.StrainId=132
+        left join ProbeSetData as T134 on T134.Id = ProbeSetXRef.DataId and T134.StrainId=134
+        left join ProbeSetData as T135 on T135.Id = ProbeSetXRef.DataId and T135.StrainId=135
+        left join ProbeSetData as T138 on T138.Id = ProbeSetXRef.DataId and T138.StrainId=138
+        left join ProbeSetData as T139 on T139.Id = ProbeSetXRef.DataId and T139.StrainId=139
+        left join ProbeSetData as T140 on T140.Id = ProbeSetXRef.DataId and T140.StrainId=140
+        left join ProbeSetData as T141 on T141.Id = ProbeSetXRef.DataId and T141.StrainId=141
+        left join ProbeSetData as T142 on T142.Id = ProbeSetXRef.DataId and T142.StrainId=142
+        left join ProbeSetData as T144 on T144.Id = ProbeSetXRef.DataId and T144.StrainId=144
+        left join ProbeSetData as T145 on T145.Id = ProbeSetXRef.DataId and T145.StrainId=145
+        left join ProbeSetData as T147 on T147.Id = ProbeSetXRef.DataId and T147.StrainId=147
+        left join ProbeSetData as T148 on T148.Id = ProbeSetXRef.DataId and T148.StrainId=148
+        left join ProbeSetData as T149 on T149.Id = ProbeSetXRef.DataId and T149.StrainId=149
+        left join ProbeSetData as T487 on T487.Id = ProbeSetXRef.DataId and T487.StrainId=487
+        left join ProbeSetData as T919 on T919.Id = ProbeSetXRef.DataId and T919.StrainId=919
+        left join ProbeSetData as T920 on T920.Id = ProbeSetXRef.DataId and T920.StrainId=920
+        left join ProbeSetData as T922 on T922.Id = ProbeSetXRef.DataId and T922.StrainId=922
+        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and
+        ProbeSetFreeze.Name = 'HC_M2_0606_P' and
+        ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id
+        """
+
+
     def process_samples(self, start_vars, sample_names, excluded_samples):
         for sample in sample_names:
             if sample not in excluded_samples:
@@ -765,7 +791,7 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php');
                 if value.strip().lower() != 'x':
                     self.samples.append(str(sample))
                     self.vals.append(float(value))
-                    self.variances.append(variance)    
+                    #self.variances.append(variance)    
 
     def getSortByValue(self, calculationMethod):
 
@@ -942,32 +968,32 @@ Resorting this table <br>
                 query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, )
             #XZ, 09/20/2008: extract literature correlation value together with gene expression values.
             #XZ, 09/20/2008: notice the difference between the code in next block.
-            elif tempTable:
-                # we can get a little performance out of selecting our LitCorr here
-                # but also we need to do this because we are unconcerned with probes that have no geneId associated with them
-                # as we would not have litCorr data.
-
-                if method == "3":
-                    query = "SELECT %s.Name, %s.value," %  (db.type,tempTable)
-                    dataStartPos = 2
-                if method == "4" or method == "5":
-                    query = "SELECT %s.Name, %s.Correlation, %s.PValue," %  (db.type,tempTable, tempTable)
-                    dataStartPos = 3
-
-                query += string.join(temp,', ')
-                query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
-                if method == "3":
-                    query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable)
-                if method == "4" or method == "5":
-                    query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
-                #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
-                for item in StrainIdstep:
-                    query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item)
-
-                if method == "3":
-                    query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
-                if method == "4" or method == "5":
-                    query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
+            #elif tempTable:
+            #    # we can get a little performance out of selecting our LitCorr here
+            #    # but also we need to do this because we are unconcerned with probes that have no geneId associated with them
+            #    # as we would not have litCorr data.
+            #
+            #    if method == "3":
+            #        query = "SELECT %s.Name, %s.value," %  (db.type,tempTable)
+            #        dataStartPos = 2
+            #    if method == "4" or method == "5":
+            #        query = "SELECT %s.Name, %s.Correlation, %s.PValue," %  (db.type,tempTable, tempTable)
+            #        dataStartPos = 3
+            #
+            #    query += string.join(temp,', ')
+            #    query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
+            #    if method == "3":
+            #        query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable)
+            #    if method == "4" or method == "5":
+            #        query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
+            #    #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
+            #    for item in StrainIdstep:
+            #        query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item)
+            #
+            #    if method == "3":
+            #        query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
+            #    if method == "4" or method == "5":
+            #        query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
             else:
                 query = "SELECT %s.Name," %  db.type
                 dataStartPos = 1
@@ -1258,11 +1284,14 @@ Resorting this table <br>
             return traits, new_vals
 
         else:
-            #_log.info("Using the slow method for correlation")
-            #
-            #_log.info("Fetching from database")
-            traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
-            #_log.info("Done fetching from database")
+            traits = self.fetchAllDatabaseData(species=self.dataset.species,
+                                               GeneId=self.gene_id,
+                                               GeneSymbol=self.trait.symbol,
+                                               strains=self.sample_names,
+                                               db=self.db,
+                                               method=self.method,
+                                               returnNumber=self.returnNumber,
+                                               tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
             totalTraits = len(traits) #XZ, 09/18/2008: total trait number
 
         return traits
@@ -1424,7 +1453,6 @@ Resorting this table <br>
 
         return trait_list
 
-
     def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None):
 
         symbolCorrDict = {}
@@ -2104,3 +2132,31 @@ Resorting this table <br>
 
         return tblobj_body, worksheet, corrScript
 
+
+def calCorrelation(values_1, values_2):
+    N = Math.min(len(values_1), len(values_2))
+    X = []
+    Y = []
+    for i in range(N):
+        if values_1[i]!= None and values_2[i]!= None:
+            X.append(values_1[i])
+            Y.append(values_2[i])
+    NN = len(X)
+    if NN <6:
+        return (0.0,NN)
+    sx = reduce(lambda x,y:x+y,X,0.0)
+    sy = reduce(lambda x,y:x+y,Y,0.0)
+    x_mean = sx/NN
+    y_mean = sy/NN
+    xyd = 0.0
+    sxd = 0.0
+    syd = 0.0
+    for i in range(NN):
+        xyd += (X[i] - x_mean)*(Y[i] - y_mean)
+        sxd += (X[i] - x_mean)*(X[i] - x_mean)
+        syd += (Y[i] - y_mean)*(Y[i] - y_mean)
+    try:
+        corr = xyd/(sqrt(sxd)*sqrt(syd))
+    except:
+        corr = 0
+    return (corr, NN)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait.coffee b/wqflask/wqflask/static/new/javascript/show_trait.coffee
index eb87cf04..0f16ac68 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait.coffee
@@ -207,7 +207,6 @@ $ ->
 
     ##Calculate Correlations Code
 
-
     on_corr_method_change = ->
         console.log("in beginning of on_corr_method_change")
         corr_method = $('select[name=corr_method]').val()
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
index 03f872ca..3c995441 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
@@ -14,12 +14,6 @@ $ ->
         $("#trait_data_form").attr("action", url);
         $("#trait_data_form").submit()
 
-    #update_time_remaining = (percent_complete) ->
-    #    seconds_remaining = 1 / (percent_complete - root.previous_percent) * (100 - percent_complete)
-    #    minutes_remaining = seconds_remaining  / 60
-    #    $('#time_remaining').text(Math.round(minutes_remaining) + " minutes remaining")
-    #    root.previous_percent = percent_complete
-        
     update_time_remaining = (percent_complete) ->
         now = new Date()
         period = now.getTime() - root.start_time
@@ -33,8 +27,6 @@ $ ->
                 $('#time_remaining').text(Math.round(total_seconds_remaining) + " seconds remaining")
             else
                 $('#time_remaining').text(minutes_remaining + " minutes remaining")
-        #else
-        #    $('#time_remaining').text("period too small")
 
     get_progress = ->
         console.log("temp_uuid:", $("#temp_uuid").val())
@@ -92,7 +84,6 @@ $ ->
         )
         console.log("settingInterval")
 
-        #root.start_time = new Date().getTime()
         this.my_timer = setInterval(get_progress, 1000)
         return false
     )
diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html
index 09172705..0cc1c353 100644
--- a/wqflask/wqflask/templates/index_page.html
+++ b/wqflask/wqflask/templates/index_page.html
@@ -9,6 +9,7 @@
         <div class="container">
             <h1>GeneNetwork</h1>
             <p class="lead">Open source bioinformatics for systems genetics</p>
+            <p>- Lei Yan</p>
         </div>
     </header>
 
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 56887d5c..799245c3 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -33,7 +33,7 @@
             </div>
             
             {% include 'show_trait_details.html' %}
-            {% include 'show_trait_statistics.html' %}
+        {#  {% include 'show_trait_statistics.html' %}    #}
             {% include 'show_trait_calculate_correlations.html' %}
             {% include 'show_trait_mapping_tools.html' %}
             {% include 'show_trait_edit_data.html' %}
diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index 543afadd..12a064c0 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -1,130 +1,119 @@
-                        <p class="sectionheader" id="title3" style="border-radius: 5px;">&nbsp;&nbsp;Calculate Correlations</p>
+<div>
+    <h2>Calculate Correlations</h2>
+    <div class="well form-horizontal">
+        
+        <div class="control-group">
+            <label for="corr_method" class="control-label">Method</label>
+            <div class="controls">
+                <select name="corr_method">
+                    <option value="sample">Sample r</option>
+                    <option value="lit">Literature r</option>
+                    <option value="tissue">Tissue r</option>
+                </select>
+            </div>
+        </div>
+        
+        <div class="control-group">
+            <label for="corr_dataset" class="control-label">Database</label>
+            <div class="controls">
+                <select name="corr_dataset">
+                    {% for tissue in corr_tools.dataset_menu %}
+                        {% if tissue.tissue %}
+                            <optgroup label="{{ tissue.tissue }} ------">
+                        {% endif %}
+                        {% for dataset in tissue.datasets %}
+                            <option value="{{ dataset[1] }}"
+                            {% if corr_tools.dataset_menu_selected == dataset[1] %}
+                                selected
+                            {% endif %}>
+                                {{ dataset[0] }}
+                            </option>
+                        {% endfor %}
+                        {% if tissue.tissue %}
+                            </optgroup>
+                        {% endif %}
+                    {% endfor %}
+                </select>
+            </div>
+        </div>
+        
+        <div class="control-group">
+            <label for="corr_return_results" class="control-label">Return</label>
+            <div class="controls">
+                <select name="corr_return_results">
+                    {% for return_result in corr_tools.return_results_menu %}
+                        <option value="{{ return_result }}"
+                        {% if corr_tools.return_results_menu_selected == return_result %}
+                            selected
+                        {% endif %}>
+                        Top {{ return_result }}
+                        </option>
+                    {% endfor %}
+                </select>
+            </div>
+        </div>
+        
+        <div class="control-group">
+            <label for="corr_samples_group" class="control-label">Samples</label>
+            <div class="controls">
+                <select name="corr_samples_group">
+                    {% for group, pretty_group in sample_group_types.items() %}
+                        <option value="{{ group }}">{{ pretty_group }}</option>
+                    {% endfor %}
+                </select>
+            </div>
+        </div>
+            
+        <div class="control-group">
+            <label for="corr_sample_method" class="control-label">Type</label>
+            <div class="controls">
+                <select name="corr_sample_method">
+                    <option value="pearson">Pearson</option>
+                    <option value="spearman">Spearman Rank</option>
+                </select>
+            </div>
+        </div>
+        
+        <div class="control-group">
+            <div class="controls">
+                <button class="btn btn-inverse submit_special"
+                        data-url="/corr_compute"
+                        title="Compute Correlation">
+                    <i class="icon-ok-circle icon-white"></i> Compute
+                </button>
+            </div>
+        </div>
 
-                        <p id="sectionbody3"></p>
+        <span id="sample_r_desc" class="correlation_desc fs12">
+            The <a href="/correlationAnnotation.html#sample_r" target="_blank">Sample Correlation</a>
+            is computed
+            between trait data and any<br>
+            other traits in the sample database selected above. Use
+            <a href="/glossary.html#Correlations" target="_blank">Spearman
+            Rank</a><br>
+            when the sample size is small (&lt;20) or when there are influential outliers.
+        </span>
+        <SPAN id="lit_r_desc" style="display: none;" class="correlation_desc fs12">
+            The <A HREF="/correlationAnnotation.html" TARGET="_blank">Literature Correlation</A>
+            (Lit r) between
+            this gene and all other genes is computed<BR>
+            using the <A HREF="https://grits.eecs.utk.edu/sgo/sgo.html" TARGET="_blank">
+            Semantic Gene Organizer</A>
+            and human, rat, and mouse data from PubMed. <BR>
+            Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<BR><BR>
+            <A HREF="/glossary.html#Literature" TARGET="_blank">More on using Lit r</A>
+        </SPAN>
+        <SPAN id="tissue_r_desc" style="display: none;" class="correlation_desc fs12">
+            The <A HREF="/webqtl/main.py?FormID=tissueCorrelation" TARGET="_blank">Tissue Correlation</A>
+            (Tissue r)
+            estimates the similarity of expression of two genes<BR>
+            or transcripts across different cells, tissues, or organs
+            (<A HREF="/correlationAnnotation.html#tissue_r" TARGET="_blank">glossary</A>).
+            Tissue correlations<BR>
+            are generated by analyzing expression in multiple samples usually taken from single cases.<BR>
+            <STRONG>Pearson</STRONG> and <STRONG>Spearman Rank</STRONG> correlations have been
+            computed for all pairs of genes<BR> using data from mouse samples.<BR>
+        </SPAN>
 
-                        <table class="target4" cellpadding="0" cellspacing="0" width="100%">
-                          <tr>
-                            <td>
-                              <div class="ui-tabs" id="corr_tabs">
-                                <div id="corrtabs-1">
-                                  <table cellpadding="0" cellspacing="0" width="100%">
-                                    <tr>
-                                      <td>
-                                        <input type="hidden" name="orderBy" value="2">
-
-                                        <table cellpadding="2" cellspacing="0" width="619px">
-                                          <tr>
-                                            <td><span class="ff1 fwb fs12">Method:</span></td>
-                                            <td colspan="3">
-                                            <select name="corr_method" size="1">
-                                                <option value="sample">Sample r</option>
-                                                <option value="lit">Literature r</option>
-                                                <option value="tissue">Tissue r</option>
-                                            </select>
-                                            </td>
-                                          </tr>
-                                          <tr>
-                                            <td><span class="ffl fwb fs12">Database:</span></td>
-                                            <td colspan="3">
-                                            <select name="corr_dataset" size="1">
-                                            {% for tissue in corr_tools.dataset_menu %}
-                                                {% if tissue.tissue %}
-                                                    <optgroup label="{{ tissue.tissue }} ------">
-                                                {% endif %}
-                                                {% for dataset in tissue.datasets %}
-                                                    <option value="{{ dataset[1] }}"
-                                                    {% if corr_tools.dataset_menu_selected == dataset[1] %}
-                                                        selected
-                                                    {% endif %}>
-                                                        {{ dataset[0] }}
-                                                    </option>
-                                                {% endfor %}
-                                                {% if tissue.tissue %}
-                                                    </optgroup>
-                                                {% endif %}
-                                            {% endfor %}
-                                            </select>
-                                            </td>
-                                          </tr>
-
-
-                                          <tr>
-                                            <td><span class="ffl fwb fs12">Return:</span></td>
-
-                                            <td><select name="corr_return_results" size="1">
-                                            {% for return_result in corr_tools.return_results_menu %}
-                                                <option value="{{ return_result }}"
-                                                {% if corr_tools.return_results_menu_selected == return_result %}
-                                                    selected
-                                                {% endif %}>
-                                                Top {{ return_result }}
-                                                </option>
-                                            {% endfor %}
-                                            </select></td>
-                                          </tr>
-
-
-                                          <tr class="mdp1">
-                                            <td><span class="ffl fwb fs12">Samples:</span></td>
-                                            <td>
-                                                <select name="corr_samples_group" size="1">
-                                                {% for group, pretty_group in sample_group_types.items() %}
-                                                    <option value="{{ group }}">
-                                                        {{ pretty_group }}
-                                                    </option>
-                                                {% endfor %}
-                                                </select>
-                                            </td>
-                                          </tr>
-
-                                        </table>
-                                        <br>
-                                        <div id="corr_sample_method_options">
-                                            Pearson <input type="radio" name="corr_sample_method" value="pearson" checked>
-                                            &nbsp;&nbsp;&nbsp;
-                                            Spearman Rank <input type="radio" name="corr_sample_method" value="spearman">
-                                        </div>
-                                        <br>
-
-                                        <input type="submit" name="corr_compute" id="corr_compute" class="btn" value="Compute"><br><br>
-
-                                        <span id="sample_r_desc" class="correlation_desc fs12">
-                                            The <a href="/correlationAnnotation.html#sample_r" target="_blank">Sample Correlation</a>
-                                            is computed
-                                            between trait data and any<br>
-                                            other traits in the sample database selected above. Use
-                                            <a href="/glossary.html#Correlations" target="_blank">Spearman
-                                            Rank</a><br>
-                                            when the sample size is small (&lt;20) or when there are influential outliers.
-                                        </span>
-                                        <SPAN id="lit_r_desc" style="display: none;" class="correlation_desc fs12">
-                                            The <A HREF="/correlationAnnotation.html" TARGET="_blank">Literature Correlation</A>
-                                            (Lit r) between
-                                            this gene and all other genes is computed<BR>
-                                            using the <A HREF="https://grits.eecs.utk.edu/sgo/sgo.html" TARGET="_blank">
-                                            Semantic Gene Organizer</A>
-                                            and human, rat, and mouse data from PubMed. <BR>
-                                            Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<BR><BR>
-                                            <A HREF="/glossary.html#Literature" TARGET="_blank">More on using Lit r</A>
-                                        </SPAN>
-                                        <SPAN id="tissue_r_desc" style="display: none;" class="correlation_desc fs12">
-                                            The <A HREF="/webqtl/main.py?FormID=tissueCorrelation" TARGET="_blank">Tissue Correlation</A>
-                                            (Tissue r)
-                                            estimates the similarity of expression of two genes<BR>
-                                            or transcripts across different cells, tissues, or organs
-                                            (<A HREF="/correlationAnnotation.html#tissue_r" TARGET="_blank">glossary</A>).
-                                            Tissue correlations<BR>
-                                            are generated by analyzing expression in multiple samples usually taken from single cases.<BR>
-                                            <STRONG>Pearson</STRONG> and <STRONG>Spearman Rank</STRONG> correlations have been
-                                            computed for all pairs of genes<BR> using data from mouse samples.<BR>
-                                        </SPAN>
-
-                                        <br>
-                                      </td>
-                                    </tr>
-                                  </table>
-                                </div>
-                              </div>
-                            </td>
-                          </tr>
-                        </table>
+    </div>
+</div>
\ No newline at end of file
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 7a504c54..8531561a 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -210,7 +210,7 @@ def marker_regression_page():
 
 @app.route("/corr_compute", methods=('POST',))
 def corr_compute_page():
-    print("In corr_compute, request.args is:", pf(request.form))
+    print("In corr_compute, request.form is:", pf(request.form))
     #fd = webqtlFormData.webqtlFormData(request.form)
     template_vars = show_corr_results.CorrelationResults(request.form)
     return render_template("correlation_page.html", **template_vars.__dict__)
author	Lei Yan	2013-05-08 22:16:26 +0000
committer	Lei Yan	2013-05-08 22:16:26 +0000
commit	b4371ef0d96605187b7474e7e4844dbebab67d8b (patch)
tree	14b68ddaa9916494551c4306c63b1a7719f7d993 /wqflask
parent	34312cfacdcb5af450d33ac3b54d7c01a6a61788 (diff)
download	genenetwork2-b4371ef0d96605187b7474e7e4844dbebab67d8b.tar.gz