aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorLei Yan2013-05-08 22:16:26 +0000
committerLei Yan2013-05-08 22:16:26 +0000
commitb4371ef0d96605187b7474e7e4844dbebab67d8b (patch)
tree14b68ddaa9916494551c4306c63b1a7719f7d993 /wqflask
parent34312cfacdcb5af450d33ac3b54d7c01a6a61788 (diff)
downloadgenenetwork2-b4371ef0d96605187b7474e7e4844dbebab67d8b.tar.gz
Worked on correlation page
Wrote function in dataset.py that gets all the traits in a dataset and their sample values
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py161
-rwxr-xr-xwqflask/base/trait.py2
-rw-r--r--wqflask/other_config/wqflask-nginx.conf45
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py192
-rw-r--r--wqflask/wqflask/static/new/javascript/show_trait.coffee1
-rw-r--r--wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee9
-rw-r--r--wqflask/wqflask/templates/index_page.html1
-rw-r--r--wqflask/wqflask/templates/show_trait.html2
-rw-r--r--wqflask/wqflask/templates/show_trait_calculate_correlations.html245
-rw-r--r--wqflask/wqflask/views.py2
10 files changed, 431 insertions, 229 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 111597a9..1520b180 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -23,6 +23,8 @@
from __future__ import absolute_import, print_function, division
import os
import math
+import string
+import collections
import json
import itertools
@@ -49,23 +51,29 @@ def create_dataset(dataset_name):
query = """
SELECT DBType.Name
FROM DBList, DBType
- WHERE DBList.Name = '%s' and
+ WHERE DBList.Name = '{}' and
DBType.Id = DBList.DBTypeId
- """ % (escape(dataset_name))
- print("query is: ", pf(query))
+ """.format(escape(dataset_name))
+ #print("query is: ", pf(query))
dataset_type = g.db.execute(query).fetchone().Name
#dataset_type = cursor.fetchone()[0]
- print("[blubber] dataset_type:", pf(dataset_type))
+ #print("[blubber] dataset_type:", pf(dataset_type))
dataset_ob = DS_NAME_MAP[dataset_type]
#dataset_class = getattr(data_set, dataset_ob)
- print("dataset_ob:", dataset_ob)
- print("DS_NAME_MAP:", pf(DS_NAME_MAP))
+ #print("dataset_ob:", dataset_ob)
+ #print("DS_NAME_MAP:", pf(DS_NAME_MAP))
dataset_class = globals()[dataset_ob]
return dataset_class(dataset_name)
+def mescape(*items):
+ """Multiple escape"""
+ escaped = [escape(item) for item in items]
+ print("escaped is:", escaped)
+ return escaped
+
class Markers(object):
"""Todo: Build in cacheing so it saves us reading the same file more than once"""
@@ -74,15 +82,21 @@ class Markers(object):
self.markers = json.load(json_data_fh)
def add_pvalues(self, p_values):
+ print("length of self.markers:", len(self.markers))
+ print("length of p_values:", len(p_values))
+
# THIS IS only needed for the case when we are limiting the number of p-values calculated
- if len(self.markers) > len(p_values):
+ if len(self.markers) < len(p_values):
self.markers = self.markers[:len(p_values)]
for marker, p_value in itertools.izip(self.markers, p_values):
marker['p_value'] = p_value
+ print("p_value is:", marker['p_value'])
marker['lod_score'] = -math.log10(marker['p_value'])
#Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+
+
class HumanMarkers(Markers):
@@ -93,9 +107,9 @@ class HumanMarkers(Markers):
for line in marker_data_fh:
splat = line.strip().split()
marker = {}
- marker['chr'] = splat[0]
+ marker['chr'] = int(splat[0])
marker['name'] = splat[1]
- marker['Mb'] = str(float(splat[3]) / 1000000)
+ marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
#print("markers is: ", pf(self.markers))
@@ -116,8 +130,7 @@ class HumanMarkers(Markers):
with Bench("deleting markers"):
markers = []
for marker in self.markers:
- #if not float(marker['Mb']) <= 0 or not float(marker['chr']) == 0:
- if float(marker['Mb']) > 0 and marker['chr'] != "0":
+ if not marker['Mb'] <= 0 and not marker['chr'] == 0:
markers.append(marker)
self.markers = markers
@@ -349,6 +362,19 @@ class PhenotypeDataSet(DataSet):
# (Urgently?) Need to write this
pass
+ def get_trait_list(self):
+ query = """
+ select PublishXRef.Id
+ from PublishXRef, PublishFreeze
+ where PublishFreeze.InbredSetId=PublishXRef.InbredSetId
+ and PublishFreeze.Id = {}
+ """.format(escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+ trait_data = {}
+ for trait in results:
+ trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+ return trait_data
+
def get_trait_info(self, trait_list, species = ''):
for this_trait in trait_list:
if not this_trait.haveinfo:
@@ -359,9 +385,7 @@ class PhenotypeDataSet(DataSet):
continue # for now
if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
description = this_trait.pre_publication_description
- this_trait.description_display = description.decode('utf-8')
-
-
+ this_trait.description_display = description
if not this_trait.year.isdigit():
this_trait.pubmed_text = "N/A"
@@ -419,7 +443,7 @@ class PhenotypeDataSet(DataSet):
PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (trait.name, self.id)
+ """ % (trait, self.id)
results = g.db.execute(query).fetchall()
return results
@@ -459,6 +483,19 @@ class GenotypeDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
+
+ def get_trait_list(self):
+ query = """
+ select Geno.Name
+ from Geno, GenoXRef
+ where GenoXRef.GenoId = Geno.Id
+ and GenoFreezeId = {}
+ """.format(escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+ trait_data = {}
+ for trait in results:
+ trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+ return trait_data
def get_trait_info(self, trait_list, species=None):
for this_trait in trait_list:
@@ -497,7 +534,7 @@ class GenotypeDataSet(DataSet):
GenoData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait.name, self.name)
+ """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)
results = g.db.execute(query).fetchall()
return results
@@ -569,7 +606,95 @@ class MrnaAssayDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
+
+ def get_trait_list_1(self):
+ query = """
+ select ProbeSet.Name
+ from ProbeSet, ProbeSetXRef
+ where ProbeSetXRef.ProbeSetId = ProbeSet.Id
+ and ProbeSetFreezeId = {}
+ """.format(escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+ print("After get_trait_list query")
+ trait_data = {}
+ for trait in results:
+ print("Retrieving sample_data for ", trait[0])
+ trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+ print("After retrieve_sample_data")
+ return trait_data
+
+ def get_trait_data(self):
+ sample_ids = []
+ for sample in self.group.samplelist:
+ query = """
+ SELECT Strain.Id FROM Strain, Species
+ WHERE Strain.Name = '{}'
+ and Strain.SpeciesId=Species.Id
+ and Species.name = '{}'
+ """.format(*mescape(sample, self.group.species))
+ this_id = g.db.execute(query).fetchone()[0]
+ sample_ids.append('%d' % this_id)
+ print("sample_ids size: ", len(sample_ids))
+
+ # MySQL limits the number of tables that can be used in a join to 61,
+ # so we break the sample ids into smaller chunks
+ chunk_count = 50
+ n = len(sample_ids) / chunk_count
+ if len(sample_ids) % chunk_count:
+ n += 1
+ print("n: ", n)
+ #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
+ #tempTable = None
+ #if GeneId and db.type == "ProbeSet":
+ # if method == "3":
+ # tempTable = self.getTempLiteratureTable(species=species,
+ # input_species_geneid=GeneId,
+ # returnNumber=returnNumber)
+ #
+ # if method == "4" or method == "5":
+ # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
+ # TissueProbeSetFreezeId=tissueProbeSetFreezeId,
+ # method=method,
+ # returnNumber=returnNumber)
+ trait_sample_data = []
+ for step in range(int(n)):
+ temp = []
+ sample_ids_step = sample_ids[step*chunk_count:min(len(sample_ids), (step+1)*chunk_count)]
+ for item in sample_ids_step:
+ temp.append('T%s.value' % item)
+ query = "SELECT {}.Name,".format(escape(self.type))
+ data_start_pos = 1
+ query += string.join(temp, ', ')
+ query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
+ self.type,
+ self.type))
+ #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
+ for item in sample_ids_step:
+ query += """
+ left join {}Data as T{} on T{}.Id = {}XRef.DataId
+ and T{}.StrainId={}\n
+ """.format(*mescape(self.type, item, item, self.type, item, item))
+ query += """
+ WHERE {}XRef.{}FreezeId = {}Freeze.Id
+ and {}Freeze.Name = '{}'
+ and {}.Id = {}XRef.{}Id
+ order by {}.Id
+ """.format(*mescape(self.type, self.type, self.type, self.type,
+ self.name, self.type, self.type, self.type, self.type))
+ print("query: ", query)
+ results = g.db.execute(query).fetchall()
+ trait_sample_data.append(results)
+
+ trait_count = len(trait_sample_data[0])
+ self.trait_data = collections.defaultdict(list)
+ # put all of the separate data together into a dictionary where the keys are
+ # trait names and values are lists of sample values
+ for j in range(trait_count):
+ trait_name = trait_sample_data[0][j][0]
+ for i in range(int(n)):
+ self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:]
+
def get_trait_info(self, trait_list=None, species=''):
# Note: setting trait_list to [] is probably not a great idea.
@@ -693,9 +818,9 @@ class MrnaAssayDataSet(DataSet):
ProbeSetFreeze.Name = %s
""" % (escape(self.name), escape(self.dataset.name))
results = g.db.execute(query).fetchone()
-
return results[0]
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -712,7 +837,7 @@ class MrnaAssayDataSet(DataSet):
ProbeSetData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (escape(trait.name), escape(self.name))
+ """ % (escape(trait), escape(self.name))
results = g.db.execute(query).fetchall()
return results
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 2af4bc24..7c1c035c 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -208,7 +208,7 @@ class GeneralTrait:
# ''' % (self.cellid, self.name, self.dataset.name)
#
#else:
- results = self.dataset.retrieve_sample_data(self)
+ results = self.dataset.retrieve_sample_data(self.name)
# Todo: is this necessary? If not remove
self.data.clear()
diff --git a/wqflask/other_config/wqflask-nginx.conf b/wqflask/other_config/wqflask-nginx.conf
index 50f9d73c..4e6fd0d9 100644
--- a/wqflask/other_config/wqflask-nginx.conf
+++ b/wqflask/other_config/wqflask-nginx.conf
@@ -2,7 +2,7 @@ server {
# Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/
listen 80;
- server_name _;
+ server_name gn2python.genenetwork.org;
access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;
@@ -37,6 +37,47 @@ server {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_read_timeout 40m;
- }
+ }
+}
+
+server {
+ # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/
+ listen 80;
+
+ server_name gn2python_lei.genenetwork.org;
+
+ access_log /var/log/nginx/lei_access.log;
+ error_log /var/log/nginx/lei_error.log;
+
+ location ^~ /css/ {
+ root /gene/wqflask/wqflask/static/;
+ }
+
+ location ^~ /javascript/ {
+ root /gene/wqflask/wqflask/static/;
+ }
+
+# location ^~ /image/ {
+# root /gene/wqflask/wqflask/static/;
+# }
+
+ location ^~ /images/ {
+ root /gene/wqflask/wqflask/static/;
+ }
+
+ ### New - added by Sam
+ #location ^~ /static/ {
+ # root /gene/wqflask/wqflask/static/;
+ #}
+ location / {
+ proxy_pass http://127.0.0.1:5001/;
+ proxy_redirect off;
+
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+
+ proxy_read_timeout 40m;
+ }
}
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 96298b37..1d0368cc 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -54,6 +54,7 @@ from dbFunction import webqtlDatabaseFunction
import utility.webqtlUtil #this is for parallel computing only.
from wqflask.correlation import correlationFunction
+from pprint import pformat as pf
METHOD_SAMPLE_PEARSON = "1"
METHOD_SAMPLE_RANK = "2"
@@ -92,7 +93,8 @@ class Trait(object):
def calculate_correlation(self, values, method):
"""Calculate the correlation value and p value according to the method specified"""
- #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for
+ #ZS: This takes the list of values of the trait our selected trait is being correlated
+ #against and removes the values of the samples our trait has no value for
#There's probably a better way of dealing with this, but I'll have to ask Christian
updated_raw_values = []
updated_values = []
@@ -276,57 +278,48 @@ class CorrelationResults(object):
# return templatePage.error(heading = heading, detail = [message], error=error)
def __init__(self, start_vars):
- #self.dataset = create_dataset(start_vars['dataset_name'])
- #self.dataset.group.read_genotype_file()
- #self.genotype = self.dataset.group.genotype
- #
+ # get trait list from db (database name)
+ # calculate correlation with Base vector and targets
+
#self.this_trait = GeneralTrait(dataset=self.dataset.name,
# name=start_vars['trait_id'],
# cellid=None)
- helper_functions.get_dataset_and_trait(self, start_vars)
+ print("start_vars: ", pf(start_vars))
+
+ helper_functions.get_species_dataset_trait(self, start_vars)
+ self.dataset.group.read_genotype_file()
self.samples = [] # Want only ones with values
self.vals = []
- self.variances = []
corr_samples_group = start_vars['corr_samples_group']
+
+ #The two if statements below append samples to the sample list based upon whether the user
+ #selected Primary Samples Only, Other Samples Only, or All Samples
+
+ #If either BXD/whatever Only or All Samples, append all of that group's samplelist
if corr_samples_group != 'samples_other':
self.process_samples(start_vars, self.dataset.group.samplelist, ())
- #for sample in self.dataset.group.samplelist:
- # value = start_vars['value:' + sample]
- # variance = start_vars['variance:' + sample]
- # if variance.strip().lower() == 'x':
- # variance = 0
- # else:
- # variance = float(variance)
- # if value.strip().lower() != 'x':
- # self.samples.append(str(sample))
- # self.vals.append(float(value))
- # self.variances.append(variance)
+ #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
+ #exclude the primary samples (because they would have been added in the previous
+ #if statement if the user selected All Samples)
if corr_samples_group != 'samples_primary':
primary_samples = (self.dataset.group.parlist +
self.dataset.group.f1list +
self.dataset.group.samplelist)
self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
- #for sample in self.this_trait.data.keys():
- # if sample not in primary_samples:
- # value = start_vars['value:' + sample]
- # variance = start_vars['variance:' + sample]
- # if variance.strip().lower() == 'x':
- # variance = 0
- # else:
- # variance = float(variance)
- # if value.strip().lower() != 'x':
- # self.samples.append(str(sample))
- # self.vals.append(float(value))
- # self.variances.append(variance)
-
- print("self.samples is:", pf(self.samples))
-
- #sample_list = get_sample_data(fd)
- #print("sample_list is", pf(sample_list))
+
+ #for i, sample in enumerate(self.samples):
+ # print("{} : {}".format(sample, self.vals[i]))
+
+ self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
+ self.target_dataset.get_trait_data()
+ print("trait_list: {}".format(pf(self.target_dataset.trait_data)))
+ # Lei Yan todo
+ for trait, values in self.target_dataset.trait_data.iteritems():
+ correlation = calCorrelation(values, )
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
@@ -753,6 +746,39 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php');
else:
self.dict['body'] = ""
+ def get_all_dataset_data(self):
+
+ """
+ SELECT ProbeSet.Name, T128.value, T129.value, T130.value, T131.value, T132.value, T134.value, T135.value, T138.value, T139.value, T140.value, T141.value, T142.value, T144
+ .value, T145.value, T147.value, T148.value, T149.value, T487.value, T919.value, T920.value, T922.value
+ FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ left join ProbeSetData as T128 on T128.Id = ProbeSetXRef.DataId and T128.StrainId=128
+ left join ProbeSetData as T129 on T129.Id = ProbeSetXRef.DataId and T129.StrainId=129
+ left join ProbeSetData as T130 on T130.Id = ProbeSetXRef.DataId and T130.StrainId=130
+ left join ProbeSetData as T131 on T131.Id = ProbeSetXRef.DataId and T131.StrainId=131
+ left join ProbeSetData as T132 on T132.Id = ProbeSetXRef.DataId and T132.StrainId=132
+ left join ProbeSetData as T134 on T134.Id = ProbeSetXRef.DataId and T134.StrainId=134
+ left join ProbeSetData as T135 on T135.Id = ProbeSetXRef.DataId and T135.StrainId=135
+ left join ProbeSetData as T138 on T138.Id = ProbeSetXRef.DataId and T138.StrainId=138
+ left join ProbeSetData as T139 on T139.Id = ProbeSetXRef.DataId and T139.StrainId=139
+ left join ProbeSetData as T140 on T140.Id = ProbeSetXRef.DataId and T140.StrainId=140
+ left join ProbeSetData as T141 on T141.Id = ProbeSetXRef.DataId and T141.StrainId=141
+ left join ProbeSetData as T142 on T142.Id = ProbeSetXRef.DataId and T142.StrainId=142
+ left join ProbeSetData as T144 on T144.Id = ProbeSetXRef.DataId and T144.StrainId=144
+ left join ProbeSetData as T145 on T145.Id = ProbeSetXRef.DataId and T145.StrainId=145
+ left join ProbeSetData as T147 on T147.Id = ProbeSetXRef.DataId and T147.StrainId=147
+ left join ProbeSetData as T148 on T148.Id = ProbeSetXRef.DataId and T148.StrainId=148
+ left join ProbeSetData as T149 on T149.Id = ProbeSetXRef.DataId and T149.StrainId=149
+ left join ProbeSetData as T487 on T487.Id = ProbeSetXRef.DataId and T487.StrainId=487
+ left join ProbeSetData as T919 on T919.Id = ProbeSetXRef.DataId and T919.StrainId=919
+ left join ProbeSetData as T920 on T920.Id = ProbeSetXRef.DataId and T920.StrainId=920
+ left join ProbeSetData as T922 on T922.Id = ProbeSetXRef.DataId and T922.StrainId=922
+ WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and
+ ProbeSetFreeze.Name = 'HC_M2_0606_P' and
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id
+ """
+
+
def process_samples(self, start_vars, sample_names, excluded_samples):
for sample in sample_names:
if sample not in excluded_samples:
@@ -765,7 +791,7 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php');
if value.strip().lower() != 'x':
self.samples.append(str(sample))
self.vals.append(float(value))
- self.variances.append(variance)
+ #self.variances.append(variance)
def getSortByValue(self, calculationMethod):
@@ -942,32 +968,32 @@ Resorting this table <br>
query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, )
#XZ, 09/20/2008: extract literature correlation value together with gene expression values.
#XZ, 09/20/2008: notice the difference between the code in next block.
- elif tempTable:
- # we can get a little performance out of selecting our LitCorr here
- # but also we need to do this because we are unconcerned with probes that have no geneId associated with them
- # as we would not have litCorr data.
-
- if method == "3":
- query = "SELECT %s.Name, %s.value," % (db.type,tempTable)
- dataStartPos = 2
- if method == "4" or method == "5":
- query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable)
- dataStartPos = 3
-
- query += string.join(temp,', ')
- query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
- if method == "3":
- query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable)
- if method == "4" or method == "5":
- query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
- #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
- for item in StrainIdstep:
- query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item)
-
- if method == "3":
- query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
- if method == "4" or method == "5":
- query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
+ #elif tempTable:
+ # # we can get a little performance out of selecting our LitCorr here
+ # # but also we need to do this because we are unconcerned with probes that have no geneId associated with them
+ # # as we would not have litCorr data.
+ #
+ # if method == "3":
+ # query = "SELECT %s.Name, %s.value," % (db.type,tempTable)
+ # dataStartPos = 2
+ # if method == "4" or method == "5":
+ # query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable)
+ # dataStartPos = 3
+ #
+ # query += string.join(temp,', ')
+ # query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
+ # if method == "3":
+ # query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable)
+ # if method == "4" or method == "5":
+ # query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
+ # #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
+ # for item in StrainIdstep:
+ # query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item)
+ #
+ # if method == "3":
+ # query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
+ # if method == "4" or method == "5":
+ # query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
else:
query = "SELECT %s.Name," % db.type
dataStartPos = 1
@@ -1258,11 +1284,14 @@ Resorting this table <br>
return traits, new_vals
else:
- #_log.info("Using the slow method for correlation")
- #
- #_log.info("Fetching from database")
- traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
- #_log.info("Done fetching from database")
+ traits = self.fetchAllDatabaseData(species=self.dataset.species,
+ GeneId=self.gene_id,
+ GeneSymbol=self.trait.symbol,
+ strains=self.sample_names,
+ db=self.db,
+ method=self.method,
+ returnNumber=self.returnNumber,
+ tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
totalTraits = len(traits) #XZ, 09/18/2008: total trait number
return traits
@@ -1424,7 +1453,6 @@ Resorting this table <br>
return trait_list
-
def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None):
symbolCorrDict = {}
@@ -2104,3 +2132,31 @@ Resorting this table <br>
return tblobj_body, worksheet, corrScript
+
+def calCorrelation(values_1, values_2):
+ N = Math.min(len(values_1), len(values_2))
+ X = []
+ Y = []
+ for i in range(N):
+ if values_1[i]!= None and values_2[i]!= None:
+ X.append(values_1[i])
+ Y.append(values_2[i])
+ NN = len(X)
+ if NN <6:
+ return (0.0,NN)
+ sx = reduce(lambda x,y:x+y,X,0.0)
+ sy = reduce(lambda x,y:x+y,Y,0.0)
+ x_mean = sx/NN
+ y_mean = sy/NN
+ xyd = 0.0
+ sxd = 0.0
+ syd = 0.0
+ for i in range(NN):
+ xyd += (X[i] - x_mean)*(Y[i] - y_mean)
+ sxd += (X[i] - x_mean)*(X[i] - x_mean)
+ syd += (Y[i] - y_mean)*(Y[i] - y_mean)
+ try:
+ corr = xyd/(sqrt(sxd)*sqrt(syd))
+ except:
+ corr = 0
+ return (corr, NN)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait.coffee b/wqflask/wqflask/static/new/javascript/show_trait.coffee
index eb87cf04..0f16ac68 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait.coffee
@@ -207,7 +207,6 @@ $ ->
##Calculate Correlations Code
-
on_corr_method_change = ->
console.log("in beginning of on_corr_method_change")
corr_method = $('select[name=corr_method]').val()
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
index 03f872ca..3c995441 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
@@ -14,12 +14,6 @@ $ ->
$("#trait_data_form").attr("action", url);
$("#trait_data_form").submit()
- #update_time_remaining = (percent_complete) ->
- # seconds_remaining = 1 / (percent_complete - root.previous_percent) * (100 - percent_complete)
- # minutes_remaining = seconds_remaining / 60
- # $('#time_remaining').text(Math.round(minutes_remaining) + " minutes remaining")
- # root.previous_percent = percent_complete
-
update_time_remaining = (percent_complete) ->
now = new Date()
period = now.getTime() - root.start_time
@@ -33,8 +27,6 @@ $ ->
$('#time_remaining').text(Math.round(total_seconds_remaining) + " seconds remaining")
else
$('#time_remaining').text(minutes_remaining + " minutes remaining")
- #else
- # $('#time_remaining').text("period too small")
get_progress = ->
console.log("temp_uuid:", $("#temp_uuid").val())
@@ -92,7 +84,6 @@ $ ->
)
console.log("settingInterval")
- #root.start_time = new Date().getTime()
this.my_timer = setInterval(get_progress, 1000)
return false
)
diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html
index 09172705..0cc1c353 100644
--- a/wqflask/wqflask/templates/index_page.html
+++ b/wqflask/wqflask/templates/index_page.html
@@ -9,6 +9,7 @@
<div class="container">
<h1>GeneNetwork</h1>
<p class="lead">Open source bioinformatics for systems genetics</p>
+ <p>- Lei Yan</p>
</div>
</header>
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 56887d5c..799245c3 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -33,7 +33,7 @@
</div>
{% include 'show_trait_details.html' %}
- {% include 'show_trait_statistics.html' %}
+ {# {% include 'show_trait_statistics.html' %} #}
{% include 'show_trait_calculate_correlations.html' %}
{% include 'show_trait_mapping_tools.html' %}
{% include 'show_trait_edit_data.html' %}
diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index 543afadd..12a064c0 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -1,130 +1,119 @@
- <p class="sectionheader" id="title3" style="border-radius: 5px;">&nbsp;&nbsp;Calculate Correlations</p>
+<div>
+ <h2>Calculate Correlations</h2>
+ <div class="well form-horizontal">
+
+ <div class="control-group">
+ <label for="corr_method" class="control-label">Method</label>
+ <div class="controls">
+ <select name="corr_method">
+ <option value="sample">Sample r</option>
+ <option value="lit">Literature r</option>
+ <option value="tissue">Tissue r</option>
+ </select>
+ </div>
+ </div>
+
+ <div class="control-group">
+ <label for="corr_dataset" class="control-label">Database</label>
+ <div class="controls">
+ <select name="corr_dataset">
+ {% for tissue in corr_tools.dataset_menu %}
+ {% if tissue.tissue %}
+ <optgroup label="{{ tissue.tissue }} ------">
+ {% endif %}
+ {% for dataset in tissue.datasets %}
+ <option value="{{ dataset[1] }}"
+ {% if corr_tools.dataset_menu_selected == dataset[1] %}
+ selected
+ {% endif %}>
+ {{ dataset[0] }}
+ </option>
+ {% endfor %}
+ {% if tissue.tissue %}
+ </optgroup>
+ {% endif %}
+ {% endfor %}
+ </select>
+ </div>
+ </div>
+
+ <div class="control-group">
+ <label for="corr_return_results" class="control-label">Return</label>
+ <div class="controls">
+ <select name="corr_return_results">
+ {% for return_result in corr_tools.return_results_menu %}
+ <option value="{{ return_result }}"
+ {% if corr_tools.return_results_menu_selected == return_result %}
+ selected
+ {% endif %}>
+ Top {{ return_result }}
+ </option>
+ {% endfor %}
+ </select>
+ </div>
+ </div>
+
+ <div class="control-group">
+ <label for="corr_samples_group" class="control-label">Samples</label>
+ <div class="controls">
+ <select name="corr_samples_group">
+ {% for group, pretty_group in sample_group_types.items() %}
+ <option value="{{ group }}">{{ pretty_group }}</option>
+ {% endfor %}
+ </select>
+ </div>
+ </div>
+
+ <div class="control-group">
+ <label for="corr_sample_method" class="control-label">Type</label>
+ <div class="controls">
+ <select name="corr_sample_method">
+ <option value="pearson">Pearson</option>
+ <option value="spearman">Spearman Rank</option>
+ </select>
+ </div>
+ </div>
+
+ <div class="control-group">
+ <div class="controls">
+ <button class="btn btn-inverse submit_special"
+ data-url="/corr_compute"
+ title="Compute Correlation">
+ <i class="icon-ok-circle icon-white"></i> Compute
+ </button>
+ </div>
+ </div>
- <p id="sectionbody3"></p>
+ <span id="sample_r_desc" class="correlation_desc fs12">
+ The <a href="/correlationAnnotation.html#sample_r" target="_blank">Sample Correlation</a>
+ is computed
+ between trait data and any<br>
+ other traits in the sample database selected above. Use
+ <a href="/glossary.html#Correlations" target="_blank">Spearman
+ Rank</a><br>
+ when the sample size is small (&lt;20) or when there are influential outliers.
+ </span>
+ <SPAN id="lit_r_desc" style="display: none;" class="correlation_desc fs12">
+ The <A HREF="/correlationAnnotation.html" TARGET="_blank">Literature Correlation</A>
+ (Lit r) between
+ this gene and all other genes is computed<BR>
+ using the <A HREF="https://grits.eecs.utk.edu/sgo/sgo.html" TARGET="_blank">
+ Semantic Gene Organizer</A>
+ and human, rat, and mouse data from PubMed. <BR>
+ Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<BR><BR>
+ <A HREF="/glossary.html#Literature" TARGET="_blank">More on using Lit r</A>
+ </SPAN>
+ <SPAN id="tissue_r_desc" style="display: none;" class="correlation_desc fs12">
+ The <A HREF="/webqtl/main.py?FormID=tissueCorrelation" TARGET="_blank">Tissue Correlation</A>
+ (Tissue r)
+ estimates the similarity of expression of two genes<BR>
+ or transcripts across different cells, tissues, or organs
+ (<A HREF="/correlationAnnotation.html#tissue_r" TARGET="_blank">glossary</A>).
+ Tissue correlations<BR>
+ are generated by analyzing expression in multiple samples usually taken from single cases.<BR>
+ <STRONG>Pearson</STRONG> and <STRONG>Spearman Rank</STRONG> correlations have been
+ computed for all pairs of genes<BR> using data from mouse samples.<BR>
+ </SPAN>
- <table class="target4" cellpadding="0" cellspacing="0" width="100%">
- <tr>
- <td>
- <div class="ui-tabs" id="corr_tabs">
- <div id="corrtabs-1">
- <table cellpadding="0" cellspacing="0" width="100%">
- <tr>
- <td>
- <input type="hidden" name="orderBy" value="2">
-
- <table cellpadding="2" cellspacing="0" width="619px">
- <tr>
- <td><span class="ff1 fwb fs12">Method:</span></td>
- <td colspan="3">
- <select name="corr_method" size="1">
- <option value="sample">Sample r</option>
- <option value="lit">Literature r</option>
- <option value="tissue">Tissue r</option>
- </select>
- </td>
- </tr>
- <tr>
- <td><span class="ffl fwb fs12">Database:</span></td>
- <td colspan="3">
- <select name="corr_dataset" size="1">
- {% for tissue in corr_tools.dataset_menu %}
- {% if tissue.tissue %}
- <optgroup label="{{ tissue.tissue }} ------">
- {% endif %}
- {% for dataset in tissue.datasets %}
- <option value="{{ dataset[1] }}"
- {% if corr_tools.dataset_menu_selected == dataset[1] %}
- selected
- {% endif %}>
- {{ dataset[0] }}
- </option>
- {% endfor %}
- {% if tissue.tissue %}
- </optgroup>
- {% endif %}
- {% endfor %}
- </select>
- </td>
- </tr>
-
-
- <tr>
- <td><span class="ffl fwb fs12">Return:</span></td>
-
- <td><select name="corr_return_results" size="1">
- {% for return_result in corr_tools.return_results_menu %}
- <option value="{{ return_result }}"
- {% if corr_tools.return_results_menu_selected == return_result %}
- selected
- {% endif %}>
- Top {{ return_result }}
- </option>
- {% endfor %}
- </select></td>
- </tr>
-
-
- <tr class="mdp1">
- <td><span class="ffl fwb fs12">Samples:</span></td>
- <td>
- <select name="corr_samples_group" size="1">
- {% for group, pretty_group in sample_group_types.items() %}
- <option value="{{ group }}">
- {{ pretty_group }}
- </option>
- {% endfor %}
- </select>
- </td>
- </tr>
-
- </table>
- <br>
- <div id="corr_sample_method_options">
- Pearson <input type="radio" name="corr_sample_method" value="pearson" checked>
- &nbsp;&nbsp;&nbsp;
- Spearman Rank <input type="radio" name="corr_sample_method" value="spearman">
- </div>
- <br>
-
- <input type="submit" name="corr_compute" id="corr_compute" class="btn" value="Compute"><br><br>
-
- <span id="sample_r_desc" class="correlation_desc fs12">
- The <a href="/correlationAnnotation.html#sample_r" target="_blank">Sample Correlation</a>
- is computed
- between trait data and any<br>
- other traits in the sample database selected above. Use
- <a href="/glossary.html#Correlations" target="_blank">Spearman
- Rank</a><br>
- when the sample size is small (&lt;20) or when there are influential outliers.
- </span>
- <SPAN id="lit_r_desc" style="display: none;" class="correlation_desc fs12">
- The <A HREF="/correlationAnnotation.html" TARGET="_blank">Literature Correlation</A>
- (Lit r) between
- this gene and all other genes is computed<BR>
- using the <A HREF="https://grits.eecs.utk.edu/sgo/sgo.html" TARGET="_blank">
- Semantic Gene Organizer</A>
- and human, rat, and mouse data from PubMed. <BR>
- Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<BR><BR>
- <A HREF="/glossary.html#Literature" TARGET="_blank">More on using Lit r</A>
- </SPAN>
- <SPAN id="tissue_r_desc" style="display: none;" class="correlation_desc fs12">
- The <A HREF="/webqtl/main.py?FormID=tissueCorrelation" TARGET="_blank">Tissue Correlation</A>
- (Tissue r)
- estimates the similarity of expression of two genes<BR>
- or transcripts across different cells, tissues, or organs
- (<A HREF="/correlationAnnotation.html#tissue_r" TARGET="_blank">glossary</A>).
- Tissue correlations<BR>
- are generated by analyzing expression in multiple samples usually taken from single cases.<BR>
- <STRONG>Pearson</STRONG> and <STRONG>Spearman Rank</STRONG> correlations have been
- computed for all pairs of genes<BR> using data from mouse samples.<BR>
- </SPAN>
-
- <br>
- </td>
- </tr>
- </table>
- </div>
- </div>
- </td>
- </tr>
- </table>
+ </div>
+</div> \ No newline at end of file
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 7a504c54..8531561a 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -210,7 +210,7 @@ def marker_regression_page():
@app.route("/corr_compute", methods=('POST',))
def corr_compute_page():
- print("In corr_compute, request.args is:", pf(request.form))
+ print("In corr_compute, request.form is:", pf(request.form))
#fd = webqtlFormData.webqtlFormData(request.form)
template_vars = show_corr_results.CorrelationResults(request.form)
return render_template("correlation_page.html", **template_vars.__dict__)