diff options
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 161 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 2 | ||||
-rw-r--r-- | wqflask/other_config/wqflask-nginx.conf | 45 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py | 192 | ||||
-rw-r--r-- | wqflask/wqflask/static/new/javascript/show_trait.coffee | 1 | ||||
-rw-r--r-- | wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee | 9 | ||||
-rw-r--r-- | wqflask/wqflask/templates/index_page.html | 1 | ||||
-rw-r--r-- | wqflask/wqflask/templates/show_trait.html | 2 | ||||
-rw-r--r-- | wqflask/wqflask/templates/show_trait_calculate_correlations.html | 245 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 2 |
10 files changed, 431 insertions, 229 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 111597a9..1520b180 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -23,6 +23,8 @@ from __future__ import absolute_import, print_function, division import os import math +import string +import collections import json import itertools @@ -49,23 +51,29 @@ def create_dataset(dataset_name): query = """ SELECT DBType.Name FROM DBList, DBType - WHERE DBList.Name = '%s' and + WHERE DBList.Name = '{}' and DBType.Id = DBList.DBTypeId - """ % (escape(dataset_name)) - print("query is: ", pf(query)) + """.format(escape(dataset_name)) + #print("query is: ", pf(query)) dataset_type = g.db.execute(query).fetchone().Name #dataset_type = cursor.fetchone()[0] - print("[blubber] dataset_type:", pf(dataset_type)) + #print("[blubber] dataset_type:", pf(dataset_type)) dataset_ob = DS_NAME_MAP[dataset_type] #dataset_class = getattr(data_set, dataset_ob) - print("dataset_ob:", dataset_ob) - print("DS_NAME_MAP:", pf(DS_NAME_MAP)) + #print("dataset_ob:", dataset_ob) + #print("DS_NAME_MAP:", pf(DS_NAME_MAP)) dataset_class = globals()[dataset_ob] return dataset_class(dataset_name) +def mescape(*items): + """Multiple escape""" + escaped = [escape(item) for item in items] + print("escaped is:", escaped) + return escaped + class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" @@ -74,15 +82,21 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): + print("length of self.markers:", len(self.markers)) + print("length of p_values:", len(p_values)) + # THIS IS only needed for the case when we are limiting the number of p-values calculated - if len(self.markers) > len(p_values): + if len(self.markers) < len(p_values): self.markers = self.markers[:len(p_values)] for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value + print("p_value is:", marker['p_value']) marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + class HumanMarkers(Markers): @@ -93,9 +107,9 @@ class HumanMarkers(Markers): for line in marker_data_fh: splat = line.strip().split() marker = {} - marker['chr'] = splat[0] + marker['chr'] = int(splat[0]) marker['name'] = splat[1] - marker['Mb'] = str(float(splat[3]) / 1000000) + marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) #print("markers is: ", pf(self.markers)) @@ -116,8 +130,7 @@ class HumanMarkers(Markers): with Bench("deleting markers"): markers = [] for marker in self.markers: - #if not float(marker['Mb']) <= 0 or not float(marker['chr']) == 0: - if float(marker['Mb']) > 0 and marker['chr'] != "0": + if not marker['Mb'] <= 0 and not marker['chr'] == 0: markers.append(marker) self.markers = markers @@ -349,6 +362,19 @@ class PhenotypeDataSet(DataSet): # (Urgently?) Need to write this pass + def get_trait_list(self): + query = """ + select PublishXRef.Id + from PublishXRef, PublishFreeze + where PublishFreeze.InbredSetId=PublishXRef.InbredSetId + and PublishFreeze.Id = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + trait_data = {} + for trait in results: + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + return trait_data + def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: if not this_trait.haveinfo: @@ -359,9 +385,7 @@ class PhenotypeDataSet(DataSet): continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description - this_trait.description_display = description.decode('utf-8') - - + this_trait.description_display = description if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" @@ -419,7 +443,7 @@ class PhenotypeDataSet(DataSet): PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id Order BY Strain.Name - """ % (trait.name, self.id) + """ % (trait, self.id) results = g.db.execute(query).fetchall() return results @@ -459,6 +483,19 @@ class GenotypeDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) + + def get_trait_list(self): + query = """ + select Geno.Name + from Geno, GenoXRef + where GenoXRef.GenoId = Geno.Id + and GenoFreezeId = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + trait_data = {} + for trait in results: + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + return trait_data def get_trait_info(self, trait_list, species=None): for this_trait in trait_list: @@ -497,7 +534,7 @@ class GenotypeDataSet(DataSet): GenoData.StrainId = Strain.Id Order BY Strain.Name - """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait.name, self.name) + """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name) results = g.db.execute(query).fetchall() return results @@ -569,7 +606,95 @@ class MrnaAssayDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) + + def get_trait_list_1(self): + query = """ + select ProbeSet.Name + from ProbeSet, ProbeSetXRef + where ProbeSetXRef.ProbeSetId = ProbeSet.Id + and ProbeSetFreezeId = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + print("After get_trait_list query") + trait_data = {} + for trait in results: + print("Retrieving sample_data for ", trait[0]) + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + print("After retrieve_sample_data") + return trait_data + + def get_trait_data(self): + sample_ids = [] + for sample in self.group.samplelist: + query = """ + SELECT Strain.Id FROM Strain, Species + WHERE Strain.Name = '{}' + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(*mescape(sample, self.group.species)) + this_id = g.db.execute(query).fetchone()[0] + sample_ids.append('%d' % this_id) + print("sample_ids size: ", len(sample_ids)) + + # MySQL limits the number of tables that can be used in a join to 61, + # so we break the sample ids into smaller chunks + chunk_count = 50 + n = len(sample_ids) / chunk_count + if len(sample_ids) % chunk_count: + n += 1 + print("n: ", n) + #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId + #tempTable = None + #if GeneId and db.type == "ProbeSet": + # if method == "3": + # tempTable = self.getTempLiteratureTable(species=species, + # input_species_geneid=GeneId, + # returnNumber=returnNumber) + # + # if method == "4" or method == "5": + # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, + # TissueProbeSetFreezeId=tissueProbeSetFreezeId, + # method=method, + # returnNumber=returnNumber) + trait_sample_data = [] + for step in range(int(n)): + temp = [] + sample_ids_step = sample_ids[step*chunk_count:min(len(sample_ids), (step+1)*chunk_count)] + for item in sample_ids_step: + temp.append('T%s.value' % item) + query = "SELECT {}.Name,".format(escape(self.type)) + data_start_pos = 1 + query += string.join(temp, ', ') + query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type, + self.type, + self.type)) + #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + for item in sample_ids_step: + query += """ + left join {}Data as T{} on T{}.Id = {}XRef.DataId + and T{}.StrainId={}\n + """.format(*mescape(self.type, item, item, self.type, item, item)) + query += """ + WHERE {}XRef.{}FreezeId = {}Freeze.Id + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(self.type, self.type, self.type, self.type, + self.name, self.type, self.type, self.type, self.type)) + print("query: ", query) + results = g.db.execute(query).fetchall() + trait_sample_data.append(results) + + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) + # put all of the separate data together into a dictionary where the keys are + # trait names and values are lists of sample values + for j in range(trait_count): + trait_name = trait_sample_data[0][j][0] + for i in range(int(n)): + self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:] + def get_trait_info(self, trait_list=None, species=''): # Note: setting trait_list to [] is probably not a great idea. @@ -693,9 +818,9 @@ class MrnaAssayDataSet(DataSet): ProbeSetFreeze.Name = %s """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() - return results[0] + def retrieve_sample_data(self, trait): query = """ SELECT @@ -712,7 +837,7 @@ class MrnaAssayDataSet(DataSet): ProbeSetData.StrainId = Strain.Id Order BY Strain.Name - """ % (escape(trait.name), escape(self.name)) + """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() return results diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 2af4bc24..7c1c035c 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -208,7 +208,7 @@ class GeneralTrait: # ''' % (self.cellid, self.name, self.dataset.name) # #else: - results = self.dataset.retrieve_sample_data(self) + results = self.dataset.retrieve_sample_data(self.name) # Todo: is this necessary? If not remove self.data.clear() diff --git a/wqflask/other_config/wqflask-nginx.conf b/wqflask/other_config/wqflask-nginx.conf index 50f9d73c..4e6fd0d9 100644 --- a/wqflask/other_config/wqflask-nginx.conf +++ b/wqflask/other_config/wqflask-nginx.conf @@ -2,7 +2,7 @@ server { # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ listen 80; - server_name _; + server_name gn2python.genenetwork.org; access_log /var/log/nginx/access.log; error_log /var/log/nginx/error.log; @@ -37,6 +37,47 @@ server { proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_read_timeout 40m; - } + } +} + +server { + # Modeled after http://flask.pocoo.org/docs/deploying/wsgi-standalone/ + listen 80; + + server_name gn2python_lei.genenetwork.org; + + access_log /var/log/nginx/lei_access.log; + error_log /var/log/nginx/lei_error.log; + + location ^~ /css/ { + root /gene/wqflask/wqflask/static/; + } + + location ^~ /javascript/ { + root /gene/wqflask/wqflask/static/; + } + +# location ^~ /image/ { +# root /gene/wqflask/wqflask/static/; +# } + + location ^~ /images/ { + root /gene/wqflask/wqflask/static/; + } + + ### New - added by Sam + #location ^~ /static/ { + # root /gene/wqflask/wqflask/static/; + #} + location / { + proxy_pass http://127.0.0.1:5001/; + proxy_redirect off; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + proxy_read_timeout 40m; + } } diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 96298b37..1d0368cc 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -54,6 +54,7 @@ from dbFunction import webqtlDatabaseFunction import utility.webqtlUtil #this is for parallel computing only. from wqflask.correlation import correlationFunction +from pprint import pformat as pf METHOD_SAMPLE_PEARSON = "1" METHOD_SAMPLE_RANK = "2" @@ -92,7 +93,8 @@ class Trait(object): def calculate_correlation(self, values, method): """Calculate the correlation value and p value according to the method specified""" - #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for + #ZS: This takes the list of values of the trait our selected trait is being correlated + #against and removes the values of the samples our trait has no value for #There's probably a better way of dealing with this, but I'll have to ask Christian updated_raw_values = [] updated_values = [] @@ -276,57 +278,48 @@ class CorrelationResults(object): # return templatePage.error(heading = heading, detail = [message], error=error) def __init__(self, start_vars): - #self.dataset = create_dataset(start_vars['dataset_name']) - #self.dataset.group.read_genotype_file() - #self.genotype = self.dataset.group.genotype - # + # get trait list from db (database name) + # calculate correlation with Base vector and targets + #self.this_trait = GeneralTrait(dataset=self.dataset.name, # name=start_vars['trait_id'], # cellid=None) - helper_functions.get_dataset_and_trait(self, start_vars) + print("start_vars: ", pf(start_vars)) + + helper_functions.get_species_dataset_trait(self, start_vars) + self.dataset.group.read_genotype_file() self.samples = [] # Want only ones with values self.vals = [] - self.variances = [] corr_samples_group = start_vars['corr_samples_group'] + + #The two if statements below append samples to the sample list based upon whether the user + #selected Primary Samples Only, Other Samples Only, or All Samples + + #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, self.dataset.group.samplelist, ()) - #for sample in self.dataset.group.samplelist: - # value = start_vars['value:' + sample] - # variance = start_vars['variance:' + sample] - # if variance.strip().lower() == 'x': - # variance = 0 - # else: - # variance = float(variance) - # if value.strip().lower() != 'x': - # self.samples.append(str(sample)) - # self.vals.append(float(value)) - # self.variances.append(variance) + #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + #exclude the primary samples (because they would have been added in the previous + #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': primary_samples = (self.dataset.group.parlist + self.dataset.group.f1list + self.dataset.group.samplelist) self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) - #for sample in self.this_trait.data.keys(): - # if sample not in primary_samples: - # value = start_vars['value:' + sample] - # variance = start_vars['variance:' + sample] - # if variance.strip().lower() == 'x': - # variance = 0 - # else: - # variance = float(variance) - # if value.strip().lower() != 'x': - # self.samples.append(str(sample)) - # self.vals.append(float(value)) - # self.variances.append(variance) - - print("self.samples is:", pf(self.samples)) - - #sample_list = get_sample_data(fd) - #print("sample_list is", pf(sample_list)) + + #for i, sample in enumerate(self.samples): + # print("{} : {}".format(sample, self.vals[i])) + + self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset.get_trait_data() + print("trait_list: {}".format(pf(self.target_dataset.trait_data))) + # Lei Yan todo + for trait, values in self.target_dataset.trait_data.iteritems(): + correlation = calCorrelation(values, ) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset @@ -753,6 +746,39 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); else: self.dict['body'] = "" + def get_all_dataset_data(self): + + """ + SELECT ProbeSet.Name, T128.value, T129.value, T130.value, T131.value, T132.value, T134.value, T135.value, T138.value, T139.value, T140.value, T141.value, T142.value, T144 + .value, T145.value, T147.value, T148.value, T149.value, T487.value, T919.value, T920.value, T922.value + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + left join ProbeSetData as T128 on T128.Id = ProbeSetXRef.DataId and T128.StrainId=128 + left join ProbeSetData as T129 on T129.Id = ProbeSetXRef.DataId and T129.StrainId=129 + left join ProbeSetData as T130 on T130.Id = ProbeSetXRef.DataId and T130.StrainId=130 + left join ProbeSetData as T131 on T131.Id = ProbeSetXRef.DataId and T131.StrainId=131 + left join ProbeSetData as T132 on T132.Id = ProbeSetXRef.DataId and T132.StrainId=132 + left join ProbeSetData as T134 on T134.Id = ProbeSetXRef.DataId and T134.StrainId=134 + left join ProbeSetData as T135 on T135.Id = ProbeSetXRef.DataId and T135.StrainId=135 + left join ProbeSetData as T138 on T138.Id = ProbeSetXRef.DataId and T138.StrainId=138 + left join ProbeSetData as T139 on T139.Id = ProbeSetXRef.DataId and T139.StrainId=139 + left join ProbeSetData as T140 on T140.Id = ProbeSetXRef.DataId and T140.StrainId=140 + left join ProbeSetData as T141 on T141.Id = ProbeSetXRef.DataId and T141.StrainId=141 + left join ProbeSetData as T142 on T142.Id = ProbeSetXRef.DataId and T142.StrainId=142 + left join ProbeSetData as T144 on T144.Id = ProbeSetXRef.DataId and T144.StrainId=144 + left join ProbeSetData as T145 on T145.Id = ProbeSetXRef.DataId and T145.StrainId=145 + left join ProbeSetData as T147 on T147.Id = ProbeSetXRef.DataId and T147.StrainId=147 + left join ProbeSetData as T148 on T148.Id = ProbeSetXRef.DataId and T148.StrainId=148 + left join ProbeSetData as T149 on T149.Id = ProbeSetXRef.DataId and T149.StrainId=149 + left join ProbeSetData as T487 on T487.Id = ProbeSetXRef.DataId and T487.StrainId=487 + left join ProbeSetData as T919 on T919.Id = ProbeSetXRef.DataId and T919.StrainId=919 + left join ProbeSetData as T920 on T920.Id = ProbeSetXRef.DataId and T920.StrainId=920 + left join ProbeSetData as T922 on T922.Id = ProbeSetXRef.DataId and T922.StrainId=922 + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and + ProbeSetFreeze.Name = 'HC_M2_0606_P' and + ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id + """ + + def process_samples(self, start_vars, sample_names, excluded_samples): for sample in sample_names: if sample not in excluded_samples: @@ -765,7 +791,7 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); if value.strip().lower() != 'x': self.samples.append(str(sample)) self.vals.append(float(value)) - self.variances.append(variance) + #self.variances.append(variance) def getSortByValue(self, calculationMethod): @@ -942,32 +968,32 @@ Resorting this table <br> query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, ) #XZ, 09/20/2008: extract literature correlation value together with gene expression values. #XZ, 09/20/2008: notice the difference between the code in next block. - elif tempTable: - # we can get a little performance out of selecting our LitCorr here - # but also we need to do this because we are unconcerned with probes that have no geneId associated with them - # as we would not have litCorr data. - - if method == "3": - query = "SELECT %s.Name, %s.value," % (db.type,tempTable) - dataStartPos = 2 - if method == "4" or method == "5": - query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) - dataStartPos = 3 - - query += string.join(temp,', ') - query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) - if method == "3": - query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) - if method == "4" or method == "5": - query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) - for item in StrainIdstep: - query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) - - if method == "3": - query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) - if method == "4" or method == "5": - query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + #elif tempTable: + # # we can get a little performance out of selecting our LitCorr here + # # but also we need to do this because we are unconcerned with probes that have no geneId associated with them + # # as we would not have litCorr data. + # + # if method == "3": + # query = "SELECT %s.Name, %s.value," % (db.type,tempTable) + # dataStartPos = 2 + # if method == "4" or method == "5": + # query = "SELECT %s.Name, %s.Correlation, %s.PValue," % (db.type,tempTable, tempTable) + # dataStartPos = 3 + # + # query += string.join(temp,', ') + # query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type) + # if method == "3": + # query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable) + # if method == "4" or method == "5": + # query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) + # #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + # for item in StrainIdstep: + # query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item) + # + # if method == "3": + # query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) + # if method == "4" or method == "5": + # query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s' and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type) else: query = "SELECT %s.Name," % db.type dataStartPos = 1 @@ -1258,11 +1284,14 @@ Resorting this table <br> return traits, new_vals else: - #_log.info("Using the slow method for correlation") - # - #_log.info("Fetching from database") - traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) - #_log.info("Done fetching from database") + traits = self.fetchAllDatabaseData(species=self.dataset.species, + GeneId=self.gene_id, + GeneSymbol=self.trait.symbol, + strains=self.sample_names, + db=self.db, + method=self.method, + returnNumber=self.returnNumber, + tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) totalTraits = len(traits) #XZ, 09/18/2008: total trait number return traits @@ -1424,7 +1453,6 @@ Resorting this table <br> return trait_list - def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None): symbolCorrDict = {} @@ -2104,3 +2132,31 @@ Resorting this table <br> return tblobj_body, worksheet, corrScript + +def calCorrelation(values_1, values_2): + N = Math.min(len(values_1), len(values_2)) + X = [] + Y = [] + for i in range(N): + if values_1[i]!= None and values_2[i]!= None: + X.append(values_1[i]) + Y.append(values_2[i]) + NN = len(X) + if NN <6: + return (0.0,NN) + sx = reduce(lambda x,y:x+y,X,0.0) + sy = reduce(lambda x,y:x+y,Y,0.0) + x_mean = sx/NN + y_mean = sy/NN + xyd = 0.0 + sxd = 0.0 + syd = 0.0 + for i in range(NN): + xyd += (X[i] - x_mean)*(Y[i] - y_mean) + sxd += (X[i] - x_mean)*(X[i] - x_mean) + syd += (Y[i] - y_mean)*(Y[i] - y_mean) + try: + corr = xyd/(sqrt(sxd)*sqrt(syd)) + except: + corr = 0 + return (corr, NN) diff --git a/wqflask/wqflask/static/new/javascript/show_trait.coffee b/wqflask/wqflask/static/new/javascript/show_trait.coffee index eb87cf04..0f16ac68 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait.coffee @@ -207,7 +207,6 @@ $ -> ##Calculate Correlations Code - on_corr_method_change = -> console.log("in beginning of on_corr_method_change") corr_method = $('select[name=corr_method]').val() diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee index 03f872ca..3c995441 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee @@ -14,12 +14,6 @@ $ -> $("#trait_data_form").attr("action", url); $("#trait_data_form").submit() - #update_time_remaining = (percent_complete) -> - # seconds_remaining = 1 / (percent_complete - root.previous_percent) * (100 - percent_complete) - # minutes_remaining = seconds_remaining / 60 - # $('#time_remaining').text(Math.round(minutes_remaining) + " minutes remaining") - # root.previous_percent = percent_complete - update_time_remaining = (percent_complete) -> now = new Date() period = now.getTime() - root.start_time @@ -33,8 +27,6 @@ $ -> $('#time_remaining').text(Math.round(total_seconds_remaining) + " seconds remaining") else $('#time_remaining').text(minutes_remaining + " minutes remaining") - #else - # $('#time_remaining').text("period too small") get_progress = -> console.log("temp_uuid:", $("#temp_uuid").val()) @@ -92,7 +84,6 @@ $ -> ) console.log("settingInterval") - #root.start_time = new Date().getTime() this.my_timer = setInterval(get_progress, 1000) return false ) diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index 09172705..0cc1c353 100644 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -9,6 +9,7 @@ <div class="container"> <h1>GeneNetwork</h1> <p class="lead">Open source bioinformatics for systems genetics</p> + <p>- Lei Yan</p> </div> </header> diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 56887d5c..799245c3 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -33,7 +33,7 @@ </div> {% include 'show_trait_details.html' %} - {% include 'show_trait_statistics.html' %} + {# {% include 'show_trait_statistics.html' %} #} {% include 'show_trait_calculate_correlations.html' %} {% include 'show_trait_mapping_tools.html' %} {% include 'show_trait_edit_data.html' %} diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 543afadd..12a064c0 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -1,130 +1,119 @@ - <p class="sectionheader" id="title3" style="border-radius: 5px;"> Calculate Correlations</p> +<div> + <h2>Calculate Correlations</h2> + <div class="well form-horizontal"> + + <div class="control-group"> + <label for="corr_method" class="control-label">Method</label> + <div class="controls"> + <select name="corr_method"> + <option value="sample">Sample r</option> + <option value="lit">Literature r</option> + <option value="tissue">Tissue r</option> + </select> + </div> + </div> + + <div class="control-group"> + <label for="corr_dataset" class="control-label">Database</label> + <div class="controls"> + <select name="corr_dataset"> + {% for tissue in corr_tools.dataset_menu %} + {% if tissue.tissue %} + <optgroup label="{{ tissue.tissue }} ------"> + {% endif %} + {% for dataset in tissue.datasets %} + <option value="{{ dataset[1] }}" + {% if corr_tools.dataset_menu_selected == dataset[1] %} + selected + {% endif %}> + {{ dataset[0] }} + </option> + {% endfor %} + {% if tissue.tissue %} + </optgroup> + {% endif %} + {% endfor %} + </select> + </div> + </div> + + <div class="control-group"> + <label for="corr_return_results" class="control-label">Return</label> + <div class="controls"> + <select name="corr_return_results"> + {% for return_result in corr_tools.return_results_menu %} + <option value="{{ return_result }}" + {% if corr_tools.return_results_menu_selected == return_result %} + selected + {% endif %}> + Top {{ return_result }} + </option> + {% endfor %} + </select> + </div> + </div> + + <div class="control-group"> + <label for="corr_samples_group" class="control-label">Samples</label> + <div class="controls"> + <select name="corr_samples_group"> + {% for group, pretty_group in sample_group_types.items() %} + <option value="{{ group }}">{{ pretty_group }}</option> + {% endfor %} + </select> + </div> + </div> + + <div class="control-group"> + <label for="corr_sample_method" class="control-label">Type</label> + <div class="controls"> + <select name="corr_sample_method"> + <option value="pearson">Pearson</option> + <option value="spearman">Spearman Rank</option> + </select> + </div> + </div> + + <div class="control-group"> + <div class="controls"> + <button class="btn btn-inverse submit_special" + data-url="/corr_compute" + title="Compute Correlation"> + <i class="icon-ok-circle icon-white"></i> Compute + </button> + </div> + </div> - <p id="sectionbody3"></p> + <span id="sample_r_desc" class="correlation_desc fs12"> + The <a href="/correlationAnnotation.html#sample_r" target="_blank">Sample Correlation</a> + is computed + between trait data and any<br> + other traits in the sample database selected above. Use + <a href="/glossary.html#Correlations" target="_blank">Spearman + Rank</a><br> + when the sample size is small (<20) or when there are influential outliers. + </span> + <SPAN id="lit_r_desc" style="display: none;" class="correlation_desc fs12"> + The <A HREF="/correlationAnnotation.html" TARGET="_blank">Literature Correlation</A> + (Lit r) between + this gene and all other genes is computed<BR> + using the <A HREF="https://grits.eecs.utk.edu/sgo/sgo.html" TARGET="_blank"> + Semantic Gene Organizer</A> + and human, rat, and mouse data from PubMed. <BR> + Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<BR><BR> + <A HREF="/glossary.html#Literature" TARGET="_blank">More on using Lit r</A> + </SPAN> + <SPAN id="tissue_r_desc" style="display: none;" class="correlation_desc fs12"> + The <A HREF="/webqtl/main.py?FormID=tissueCorrelation" TARGET="_blank">Tissue Correlation</A> + (Tissue r) + estimates the similarity of expression of two genes<BR> + or transcripts across different cells, tissues, or organs + (<A HREF="/correlationAnnotation.html#tissue_r" TARGET="_blank">glossary</A>). + Tissue correlations<BR> + are generated by analyzing expression in multiple samples usually taken from single cases.<BR> + <STRONG>Pearson</STRONG> and <STRONG>Spearman Rank</STRONG> correlations have been + computed for all pairs of genes<BR> using data from mouse samples.<BR> + </SPAN> - <table class="target4" cellpadding="0" cellspacing="0" width="100%"> - <tr> - <td> - <div class="ui-tabs" id="corr_tabs"> - <div id="corrtabs-1"> - <table cellpadding="0" cellspacing="0" width="100%"> - <tr> - <td> - <input type="hidden" name="orderBy" value="2"> - - <table cellpadding="2" cellspacing="0" width="619px"> - <tr> - <td><span class="ff1 fwb fs12">Method:</span></td> - <td colspan="3"> - <select name="corr_method" size="1"> - <option value="sample">Sample r</option> - <option value="lit">Literature r</option> - <option value="tissue">Tissue r</option> - </select> - </td> - </tr> - <tr> - <td><span class="ffl fwb fs12">Database:</span></td> - <td colspan="3"> - <select name="corr_dataset" size="1"> - {% for tissue in corr_tools.dataset_menu %} - {% if tissue.tissue %} - <optgroup label="{{ tissue.tissue }} ------"> - {% endif %} - {% for dataset in tissue.datasets %} - <option value="{{ dataset[1] }}" - {% if corr_tools.dataset_menu_selected == dataset[1] %} - selected - {% endif %}> - {{ dataset[0] }} - </option> - {% endfor %} - {% if tissue.tissue %} - </optgroup> - {% endif %} - {% endfor %} - </select> - </td> - </tr> - - - <tr> - <td><span class="ffl fwb fs12">Return:</span></td> - - <td><select name="corr_return_results" size="1"> - {% for return_result in corr_tools.return_results_menu %} - <option value="{{ return_result }}" - {% if corr_tools.return_results_menu_selected == return_result %} - selected - {% endif %}> - Top {{ return_result }} - </option> - {% endfor %} - </select></td> - </tr> - - - <tr class="mdp1"> - <td><span class="ffl fwb fs12">Samples:</span></td> - <td> - <select name="corr_samples_group" size="1"> - {% for group, pretty_group in sample_group_types.items() %} - <option value="{{ group }}"> - {{ pretty_group }} - </option> - {% endfor %} - </select> - </td> - </tr> - - </table> - <br> - <div id="corr_sample_method_options"> - Pearson <input type="radio" name="corr_sample_method" value="pearson" checked> - - Spearman Rank <input type="radio" name="corr_sample_method" value="spearman"> - </div> - <br> - - <input type="submit" name="corr_compute" id="corr_compute" class="btn" value="Compute"><br><br> - - <span id="sample_r_desc" class="correlation_desc fs12"> - The <a href="/correlationAnnotation.html#sample_r" target="_blank">Sample Correlation</a> - is computed - between trait data and any<br> - other traits in the sample database selected above. Use - <a href="/glossary.html#Correlations" target="_blank">Spearman - Rank</a><br> - when the sample size is small (<20) or when there are influential outliers. - </span> - <SPAN id="lit_r_desc" style="display: none;" class="correlation_desc fs12"> - The <A HREF="/correlationAnnotation.html" TARGET="_blank">Literature Correlation</A> - (Lit r) between - this gene and all other genes is computed<BR> - using the <A HREF="https://grits.eecs.utk.edu/sgo/sgo.html" TARGET="_blank"> - Semantic Gene Organizer</A> - and human, rat, and mouse data from PubMed. <BR> - Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<BR><BR> - <A HREF="/glossary.html#Literature" TARGET="_blank">More on using Lit r</A> - </SPAN> - <SPAN id="tissue_r_desc" style="display: none;" class="correlation_desc fs12"> - The <A HREF="/webqtl/main.py?FormID=tissueCorrelation" TARGET="_blank">Tissue Correlation</A> - (Tissue r) - estimates the similarity of expression of two genes<BR> - or transcripts across different cells, tissues, or organs - (<A HREF="/correlationAnnotation.html#tissue_r" TARGET="_blank">glossary</A>). - Tissue correlations<BR> - are generated by analyzing expression in multiple samples usually taken from single cases.<BR> - <STRONG>Pearson</STRONG> and <STRONG>Spearman Rank</STRONG> correlations have been - computed for all pairs of genes<BR> using data from mouse samples.<BR> - </SPAN> - - <br> - </td> - </tr> - </table> - </div> - </div> - </td> - </tr> - </table> + </div> +</div>
\ No newline at end of file diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 7a504c54..8531561a 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -210,7 +210,7 @@ def marker_regression_page(): @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): - print("In corr_compute, request.args is:", pf(request.form)) + print("In corr_compute, request.form is:", pf(request.form)) #fd = webqtlFormData.webqtlFormData(request.form) template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) |