From b4371ef0d96605187b7474e7e4844dbebab67d8b Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Wed, 8 May 2013 22:16:26 +0000 Subject: Worked on correlation page Wrote function in dataset.py that gets all the traits in a dataset and their sample values --- misc/notes.txt | 5 +- web/webqtl/correlation/CorrelationPage.py | 2 +- wqflask/base/data_set.py | 161 ++++++++++++-- wqflask/base/trait.py | 2 +- wqflask/other_config/wqflask-nginx.conf | 45 +++- wqflask/wqflask/correlation/show_corr_results.py | 192 ++++++++++------ .../static/new/javascript/show_trait.coffee | 1 - .../new/javascript/show_trait_mapping_tools.coffee | 9 - wqflask/wqflask/templates/index_page.html | 1 + wqflask/wqflask/templates/show_trait.html | 2 +- .../show_trait_calculate_correlations.html | 245 ++++++++++----------- wqflask/wqflask/views.py | 2 +- 12 files changed, 434 insertions(+), 233 deletions(-) diff --git a/misc/notes.txt b/misc/notes.txt index 6bdcccf3..4e478345 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -82,9 +82,8 @@ export TERM=screen To search for commands in history if necessary: history | grep "(whatever is being searched for)" -Run web server: -/usr/local/nginx/sbin/nginx -/usr/sbin/nginx +Reload web server: +/usr/sbin/nginx -s reload Run server: python runserver.py diff --git a/web/webqtl/correlation/CorrelationPage.py b/web/webqtl/correlation/CorrelationPage.py index 72e53f1d..8c74ae0c 100755 --- a/web/webqtl/correlation/CorrelationPage.py +++ b/web/webqtl/correlation/CorrelationPage.py @@ -965,7 +965,7 @@ Resorting this table
if tempTable: self.cursor.execute( 'DROP TEMPORARY TABLE %s' % tempTable ) - return traits + return traits diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 111597a9..1520b180 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -23,6 +23,8 @@ from __future__ import absolute_import, print_function, division import os import math +import string +import collections import json import itertools @@ -49,23 +51,29 @@ def create_dataset(dataset_name): query = """ SELECT DBType.Name FROM DBList, DBType - WHERE DBList.Name = '%s' and + WHERE DBList.Name = '{}' and DBType.Id = DBList.DBTypeId - """ % (escape(dataset_name)) - print("query is: ", pf(query)) + """.format(escape(dataset_name)) + #print("query is: ", pf(query)) dataset_type = g.db.execute(query).fetchone().Name #dataset_type = cursor.fetchone()[0] - print("[blubber] dataset_type:", pf(dataset_type)) + #print("[blubber] dataset_type:", pf(dataset_type)) dataset_ob = DS_NAME_MAP[dataset_type] #dataset_class = getattr(data_set, dataset_ob) - print("dataset_ob:", dataset_ob) - print("DS_NAME_MAP:", pf(DS_NAME_MAP)) + #print("dataset_ob:", dataset_ob) + #print("DS_NAME_MAP:", pf(DS_NAME_MAP)) dataset_class = globals()[dataset_ob] return dataset_class(dataset_name) +def mescape(*items): + """Multiple escape""" + escaped = [escape(item) for item in items] + print("escaped is:", escaped) + return escaped + class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" @@ -74,15 +82,21 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): + print("length of self.markers:", len(self.markers)) + print("length of p_values:", len(p_values)) + # THIS IS only needed for the case when we are limiting the number of p-values calculated - if len(self.markers) > len(p_values): + if len(self.markers) < len(p_values): self.markers = self.markers[:len(p_values)] for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value + print("p_value is:", marker['p_value']) marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + class HumanMarkers(Markers): @@ -93,9 +107,9 @@ class HumanMarkers(Markers): for line in marker_data_fh: splat = line.strip().split() marker = {} - marker['chr'] = splat[0] + marker['chr'] = int(splat[0]) marker['name'] = splat[1] - marker['Mb'] = str(float(splat[3]) / 1000000) + marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) #print("markers is: ", pf(self.markers)) @@ -116,8 +130,7 @@ class HumanMarkers(Markers): with Bench("deleting markers"): markers = [] for marker in self.markers: - #if not float(marker['Mb']) <= 0 or not float(marker['chr']) == 0: - if float(marker['Mb']) > 0 and marker['chr'] != "0": + if not marker['Mb'] <= 0 and not marker['chr'] == 0: markers.append(marker) self.markers = markers @@ -349,6 +362,19 @@ class PhenotypeDataSet(DataSet): # (Urgently?) Need to write this pass + def get_trait_list(self): + query = """ + select PublishXRef.Id + from PublishXRef, PublishFreeze + where PublishFreeze.InbredSetId=PublishXRef.InbredSetId + and PublishFreeze.Id = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + trait_data = {} + for trait in results: + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + return trait_data + def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: if not this_trait.haveinfo: @@ -359,9 +385,7 @@ class PhenotypeDataSet(DataSet): continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description - this_trait.description_display = description.decode('utf-8') - - + this_trait.description_display = description if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" @@ -419,7 +443,7 @@ class PhenotypeDataSet(DataSet): PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id Order BY Strain.Name - """ % (trait.name, self.id) + """ % (trait, self.id) results = g.db.execute(query).fetchall() return results @@ -459,6 +483,19 @@ class GenotypeDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) + + def get_trait_list(self): + query = """ + select Geno.Name + from Geno, GenoXRef + where GenoXRef.GenoId = Geno.Id + and GenoFreezeId = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + trait_data = {} + for trait in results: + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + return trait_data def get_trait_info(self, trait_list, species=None): for this_trait in trait_list: @@ -497,7 +534,7 @@ class GenotypeDataSet(DataSet): GenoData.StrainId = Strain.Id Order BY Strain.Name - """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait.name, self.name) + """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name) results = g.db.execute(query).fetchall() return results @@ -569,7 +606,95 @@ class MrnaAssayDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) + + def get_trait_list_1(self): + query = """ + select ProbeSet.Name + from ProbeSet, ProbeSetXRef + where ProbeSetXRef.ProbeSetId = ProbeSet.Id + and ProbeSetFreezeId = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + print("After get_trait_list query") + trait_data = {} + for trait in results: + print("Retrieving sample_data for ", trait[0]) + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + print("After retrieve_sample_data") + return trait_data + + def get_trait_data(self): + sample_ids = [] + for sample in self.group.samplelist: + query = """ + SELECT Strain.Id FROM Strain, Species + WHERE Strain.Name = '{}' + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(*mescape(sample, self.group.species)) + this_id = g.db.execute(query).fetchone()[0] + sample_ids.append('%d' % this_id) + print("sample_ids size: ", len(sample_ids)) + + # MySQL limits the number of tables that can be used in a join to 61, + # so we break the sample ids into smaller chunks + chunk_count = 50 + n = len(sample_ids) / chunk_count + if len(sample_ids) % chunk_count: + n += 1 + print("n: ", n) + #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId + #tempTable = None + #if GeneId and db.type == "ProbeSet": + # if method == "3": + # tempTable = self.getTempLiteratureTable(species=species, + # input_species_geneid=GeneId, + # returnNumber=returnNumber) + # + # if method == "4" or method == "5": + # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, + # TissueProbeSetFreezeId=tissueProbeSetFreezeId, + # method=method, + # returnNumber=returnNumber) + trait_sample_data = [] + for step in range(int(n)): + temp = [] + sample_ids_step = sample_ids[step*chunk_count:min(len(sample_ids), (step+1)*chunk_count)] + for item in sample_ids_step: + temp.append('T%s.value' % item) + query = "SELECT {}.Name,".format(escape(self.type)) + data_start_pos = 1 + query += string.join(temp, ', ') + query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type, + self.type, + self.type)) + #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + for item in sample_ids_step: + query += """ + left join {}Data as T{} on T{}.Id = {}XRef.DataId + and T{}.StrainId={}\n + """.format(*mescape(self.type, item, item, self.type, item, item)) + query += """ + WHERE {}XRef.{}FreezeId = {}Freeze.Id + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(self.type, self.type, self.type, self.type, + self.name, self.type, self.type, self.type, self.type)) + print("query: ", query) + results = g.db.execute(query).fetchall() + trait_sample_data.append(results) + + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) + # put all of the separate data together into a dictionary where the keys are + # trait names and values are lists of sample values + for j in range(trait_count): + trait_name = trait_sample_data[0][j][0] + for i in range(int(n)): + self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:] + def get_trait_info(self, trait_list=None, species=''): # Note: setting trait_list to [] is probably not a great idea. @@ -693,9 +818,9 @@ class MrnaAssayDataSet(DataSet): ProbeSetFreeze.Name = %s """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() - return results[0] + def retrieve_sample_data(self, trait): query = """ SELECT @@ -712,7 +837,7 @@ class MrnaAssayDataSet(DataSet): ProbeSetData.StrainId = Strain.Id Order BY Strain.Name - """ % (escape(trait.name), escape(self.name)) + """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() return results diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 2af4bc24..7c1c035c 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -208,7 +208,7 @@ class GeneralTrait: # ''' % (self.cellid, self.name, self.dataset.name) # #else: - results = self.dataset.retrieve_sample_data(self) + results = self.dataset.retrieve_sample_data(self.name) # Todo: is this necessary? If not remove self.data.clear() diff --git a/wqflask/other_config/wqflask-nginx.conf b/wqflask/other_config/wqflask-nginx.conf index 50f9d73c..4e6fd0d9 100644 --- a/wqflask/other_config/wqflask-nginx.conf +++ b/wqflask/other_config/wqflask-nginx.conf @@ -2,7 +2,7 @@ server { # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ listen 80; - server_name _; + server_name gn2python.genenetwork.org; access_log /var/log/nginx/access.log; error_log /var/log/nginx/error.log; @@ -37,6 +37,47 @@ server { proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_read_timeout 40m; - } + } +} + +server { + # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ + listen 80; + + server_name gn2python_lei.genenetwork.org; + + access_log /var/log/nginx/lei_access.log; + error_log /var/log/nginx/lei_error.log; + + location ^~ /css/ { + root /gene/wqflask/wqflask/static/; + } + + location ^~ /javascript/ { + root /gene/wqflask/wqflask/static/; + } + +# location ^~ /image/ { +# root /gene/wqflask/wqflask/static/; +# } + + location ^~ /images/ { + root /gene/wqflask/wqflask/static/; + } + + ### New - added by Sam + #location ^~ /static/ { + # root /gene/wqflask/wqflask/static/; + #} + location / { + proxy_pass http://127.0.0.1:5001/; + proxy_redirect off; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + proxy_read_timeout 40m; + } } diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 96298b37..1d0368cc 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -54,6 +54,7 @@ from dbFunction import webqtlDatabaseFunction import utility.webqtlUtil #this is for parallel computing only. from wqflask.correlation import correlationFunction +from pprint import pformat as pf METHOD_SAMPLE_PEARSON = "1" METHOD_SAMPLE_RANK = "2" @@ -92,7 +93,8 @@ class Trait(object): def calculate_correlation(self, values, method): """Calculate the correlation value and p value according to the method specified""" - #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for + #ZS: This takes the list of values of the trait our selected trait is being correlated + #against and removes the values of the samples our trait has no value for #There's probably a better way of dealing with this, but I'll have to ask Christian updated_raw_values = [] updated_values = [] @@ -276,57 +278,48 @@ class CorrelationResults(object): # return templatePage.error(heading = heading, detail = [message], error=error) def __init__(self, start_vars): - #self.dataset = create_dataset(start_vars['dataset_name']) - #self.dataset.group.read_genotype_file() - #self.genotype = self.dataset.group.genotype - # + # get trait list from db (database name) + # calculate correlation with Base vector and targets + #self.this_trait = GeneralTrait(dataset=self.dataset.name, # name=start_vars['trait_id'], # cellid=None) - helper_functions.get_dataset_and_trait(self, start_vars) + print("start_vars: ", pf(start_vars)) + + helper_functions.get_species_dataset_trait(self, start_vars) + self.dataset.group.read_genotype_file() self.samples = [] # Want only ones with values self.vals = [] - self.variances = [] corr_samples_group = start_vars['corr_samples_group'] + + #The two if statements below append samples to the sample list based upon whether the user + #selected Primary Samples Only, Other Samples Only, or All Samples + + #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, self.dataset.group.samplelist, ()) - #for sample in self.dataset.group.samplelist: - # value = start_vars['value:' + sample] - # variance = start_vars['variance:' + sample] - # if variance.strip().lower() == 'x': - # variance = 0 - # else: - # variance = float(variance) - # if value.strip().lower() != 'x': - # self.samples.append(str(sample)) - # self.vals.append(float(value)) - # self.variances.append(variance) + #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + #exclude the primary samples (because they would have been added in the previous + #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': primary_samples = (self.dataset.group.parlist + self.dataset.group.f1list + self.dataset.group.samplelist) self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) - #for sample in self.this_trait.data.keys(): - # if sample not in primary_samples: - # value = start_vars['value:' + sample] - # variance = start_vars['variance:' + sample] - # if variance.strip().lower() == 'x': - # variance = 0 - # else: - # variance = float(variance) - # if value.strip().lower() != 'x': - # self.samples.append(str(sample)) - # self.vals.append(float(value)) - # self.variances.append(variance) - - print("self.samples is:", pf(self.samples)) - - #sample_list = get_sample_data(fd) - #print("sample_list is", pf(sample_list)) + + #for i, sample in enumerate(self.samples): + # print("{} : {}".format(sample, self.vals[i])) + + self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset.get_trait_data() + print("trait_list: {}".format(pf(self.target_dataset.trait_data))) + # Lei Yan todo + for trait, values in self.target_dataset.trait_data.iteritems(): + correlation = calCorrelation(values, ) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset @@ -753,6 +746,39 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); else: self.dict['body'] = "" + def get_all_dataset_data(self): + + """ + SELECT ProbeSet.Name, T128.value, T129.value, T130.value, T131.value, T132.value, T134.value, T135.value, T138.value, T139.value, T140.value, T141.value, T142.value, T144 + .value, T145.value, T147.value, T148.value, T149.value, T487.value, T919.value, T920.value, T922.value + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + left join ProbeSetData as T128 on T128.Id = ProbeSetXRef.DataId and T128.StrainId=128 + left join ProbeSetData as T129 on T129.Id = ProbeSetXRef.DataId and T129.StrainId=129 + left join ProbeSetData as T130 on T130.Id = ProbeSetXRef.DataId and T130.StrainId=130 + left join ProbeSetData as T131 on T131.Id = ProbeSetXRef.DataId and T131.StrainId=131 + left join ProbeSetData as T132 on T132.Id = ProbeSetXRef.DataId and T132.StrainId=132 + left join ProbeSetData as T134 on T134.Id = ProbeSetXRef.DataId and T134.StrainId=134 + left join ProbeSetData as T135 on T135.Id = ProbeSetXRef.DataId and T135.StrainId=135 + left join ProbeSetData as T138 on T138.Id = ProbeSetXRef.DataId and T138.StrainId=138 + left join ProbeSetData as T139 on T139.Id = ProbeSetXRef.DataId and T139.StrainId=139 + left join ProbeSetData as T140 on T140.Id = ProbeSetXRef.DataId and T140.StrainId=140 + left join ProbeSetData as T141 on T141.Id = ProbeSetXRef.DataId and T141.StrainId=141 + left join ProbeSetData as T142 on T142.Id = ProbeSetXRef.DataId and T142.StrainId=142 + left join ProbeSetData as T144 on T144.Id = ProbeSetXRef.DataId and T144.StrainId=144 + left join ProbeSetData as T145 on T145.Id = ProbeSetXRef.DataId and T145.StrainId=145 + left join ProbeSetData as T147 on T147.Id = ProbeSetXRef.DataId and T147.StrainId=147 + left join ProbeSetData as T148 on T148.Id = ProbeSetXRef.DataId and T148.StrainId=148 + left join ProbeSetData as T149 on T149.Id = ProbeSetXRef.DataId and T149.StrainId=149 + left join ProbeSetData as T487 on T487.Id = ProbeSetXRef.DataId and T487.StrainId=487 + left join ProbeSetData as T919 on T919.Id = ProbeSetXRef.DataId and T919.StrainId=919 + left join ProbeSetData as T920 on T920.Id = ProbeSetXRef.DataId and T920.StrainId=920 + left join ProbeSetData as T922 on T922.Id = ProbeSetXRef.DataId and T922.StrainId=922 + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and + ProbeSetFreeze.Name = 'HC_M2_0606_P' and + ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id + """ + + def process_samples(self, start_vars, sample_names, excluded_samples): for sample in sample_names: if sample not in excluded_samples: @@ -765,7 +791,7 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); if value.strip().lower() != 'x': self.samples.append(str(sample)) self.vals.append(float(value)) - self.variances.append(variance) + #self.variances.append(variance) def getSortByValue(self, calculationMethod): @@ -942,32 +968,32 @@ Resorting this table
query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, ) #XZ, 09/20/2008: extract literature correlation value together with gene expression values. #XZ, 09/20/2008: notice the difference between the code in next block. - elif tempTable: - # we can get a little performance out of selecting our LitCorr here - # but also we need to do this because we are unconcerned with probes that have no geneId associated with them - # as we would not have litCorr data. - - if method == "3": - query = "SELECT %s.Name, %s.value," % (db.type,tempTable) - dataStartPos = 2 - if method == "4" or method == "5": - query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) - dataStartPos = 3 - - query += string.join(temp,', ') - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - if method == "3": - query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) - if method == "4" or method == "5": - query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) - for item in StrainIdstep: - query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) - - if method == "3": - query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - if method == "4" or method == "5": - query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + #elif tempTable: + # # we can get a little performance out of selecting our LitCorr here + # # but also we need to do this because we are unconcerned with probes that have no geneId associated with them + # # as we would not have litCorr data. + # + # if method == "3": + # query = "SELECT %s.Name, %s.value," % (db.type,tempTable) + # dataStartPos = 2 + # if method == "4" or method == "5": + # query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) + # dataStartPos = 3 + # + # query += string.join(temp,', ') + # query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) + # if method == "3": + # query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) + # if method == "4" or method == "5": + # query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) + # #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + # for item in StrainIdstep: + # query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) + # + # if method == "3": + # query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + # if method == "4" or method == "5": + # query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) else: query = "SELECT %s.Name," % db.type dataStartPos = 1 @@ -1258,11 +1284,14 @@ Resorting this table
return traits, new_vals else: - #_log.info("Using the slow method for correlation") - # - #_log.info("Fetching from database") - traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) - #_log.info("Done fetching from database") + traits = self.fetchAllDatabaseData(species=self.dataset.species, + GeneId=self.gene_id, + GeneSymbol=self.trait.symbol, + strains=self.sample_names, + db=self.db, + method=self.method, + returnNumber=self.returnNumber, + tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) totalTraits = len(traits) #XZ, 09/18/2008: total trait number return traits @@ -1424,7 +1453,6 @@ Resorting this table
return trait_list - def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None): symbolCorrDict = {} @@ -2104,3 +2132,31 @@ Resorting this table
return tblobj_body, worksheet, corrScript + +def calCorrelation(values_1, values_2): + N = Math.min(len(values_1), len(values_2)) + X = [] + Y = [] + for i in range(N): + if values_1[i]!= None and values_2[i]!= None: + X.append(values_1[i]) + Y.append(values_2[i]) + NN = len(X) + if NN <6: + return (0.0,NN) + sx = reduce(lambda x,y:x+y,X,0.0) + sy = reduce(lambda x,y:x+y,Y,0.0) + x_mean = sx/NN + y_mean = sy/NN + xyd = 0.0 + sxd = 0.0 + syd = 0.0 + for i in range(NN): + xyd += (X[i] - x_mean)*(Y[i] - y_mean) + sxd += (X[i] - x_mean)*(X[i] - x_mean) + syd += (Y[i] - y_mean)*(Y[i] - y_mean) + try: + corr = xyd/(sqrt(sxd)*sqrt(syd)) + except: + corr = 0 + return (corr, NN) diff --git a/wqflask/wqflask/static/new/javascript/show_trait.coffee b/wqflask/wqflask/static/new/javascript/show_trait.coffee index eb87cf04..0f16ac68 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait.coffee @@ -207,7 +207,6 @@ $ -> ##Calculate Correlations Code - on_corr_method_change = -> console.log("in beginning of on_corr_method_change") corr_method = $('select[name=corr_method]').val() diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee index 03f872ca..3c995441 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee @@ -14,12 +14,6 @@ $ -> $("#trait_data_form").attr("action", url); $("#trait_data_form").submit() - #update_time_remaining = (percent_complete) -> - # seconds_remaining = 1 / (percent_complete - root.previous_percent) * (100 - percent_complete) - # minutes_remaining = seconds_remaining / 60 - # $('#time_remaining').text(Math.round(minutes_remaining) + " minutes remaining") - # root.previous_percent = percent_complete - update_time_remaining = (percent_complete) -> now = new Date() period = now.getTime() - root.start_time @@ -33,8 +27,6 @@ $ -> $('#time_remaining').text(Math.round(total_seconds_remaining) + " seconds remaining") else $('#time_remaining').text(minutes_remaining + " minutes remaining") - #else - # $('#time_remaining').text("period too small") get_progress = -> console.log("temp_uuid:", $("#temp_uuid").val()) @@ -92,7 +84,6 @@ $ -> ) console.log("settingInterval") - #root.start_time = new Date().getTime() this.my_timer = setInterval(get_progress, 1000) return false ) diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index 09172705..0cc1c353 100644 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -9,6 +9,7 @@

GeneNetwork

Open source bioinformatics for systems genetics

+

- Lei Yan

diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 56887d5c..799245c3 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -33,7 +33,7 @@ {% include 'show_trait_details.html' %} - {% include 'show_trait_statistics.html' %} + {# {% include 'show_trait_statistics.html' %} #} {% include 'show_trait_calculate_correlations.html' %} {% include 'show_trait_mapping_tools.html' %} {% include 'show_trait_edit_data.html' %} diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 543afadd..12a064c0 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -1,130 +1,119 @@ -

  Calculate Correlations

+
+

Calculate Correlations

+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+
+ +
+
-

+ + The Sample Correlation + is computed + between trait data and any
+ other traits in the sample database selected above. Use + Spearman + Rank
+ when the sample size is small (<20) or when there are influential outliers. +
+ + - - - - -
-
-
- - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
Method: - -
Database: - -
Return:
Samples: - -
-
-
- Pearson -     - Spearman Rank -
-
- -

- - - The Sample Correlation - is computed - between trait data and any
- other traits in the sample database selected above. Use - Spearman - Rank
- when the sample size is small (<20) or when there are influential outliers. -
- - - -
-
-
-
-
+
+
\ No newline at end of file diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 7a504c54..8531561a 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -210,7 +210,7 @@ def marker_regression_page(): @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): - print("In corr_compute, request.args is:", pf(request.form)) + print("In corr_compute, request.form is:", pf(request.form)) #fd = webqtlFormData.webqtlFormData(request.form) template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) -- cgit v1.2.3