From 466be48f92d4943995c7a3e7bcb9fd1efd775bf6 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 30 May 2013 23:14:50 +0000 Subject: Rewrote some code in get_trait_info in dataset.py Added spearman correlation to show_corr_results and template --- wqflask/base/data_set.py | 123 +++++++++++++---------- wqflask/base/trait.py | 4 +- wqflask/wqflask/correlation/show_corr_results.py | 36 ++++--- wqflask/wqflask/templates/correlation_page.html | 52 +++++++--- 4 files changed, 126 insertions(+), 89 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index c2380f8c..4c5c46a5 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -672,12 +672,13 @@ class MrnaAssayDataSet(DataSet): query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type, self.type, self.type)) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + for item in sample_ids_step: query += """ left join {}Data as T{} on T{}.Id = {}XRef.DataId and T{}.StrainId={}\n """.format(*mescape(self.type, item, item, self.type, item, item)) + query += """ WHERE {}XRef.{}FreezeId = {}Freeze.Id and {}Freeze.Name = '{}' @@ -690,17 +691,19 @@ class MrnaAssayDataSet(DataSet): trait_count = len(trait_sample_data[0]) self.trait_data = collections.defaultdict(list) + # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values - for j in range(trait_count): - trait_name = trait_sample_data[0][j][0] - for i in range(int(number_chunks)): - self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:] - + for trait_counter in range(trait_count): + trait_name = trait_sample_data[0][trait_counter][0] + for chunk_counter in range(int(number_chunks)): + self.trait_data[trait_name] += ( + trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) + def get_trait_info(self, trait_list=None, species=''): - # Note: setting trait_list to [] is probably not a great idea. + # Note: setting trait_list to [] is probably not a great idea. if not trait_list: trait_list = [] @@ -709,9 +712,7 @@ class MrnaAssayDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieveInfo(QTL=1) - if this_trait.symbol: - pass - else: + if not this_trait.symbol: this_trait.symbol = "N/A" #XZ, 12/08/2008: description @@ -719,60 +720,56 @@ class MrnaAssayDataSet(DataSet): description_string = str(this_trait.description).strip() target_string = str(this_trait.probe_target_description).strip() - description_display = '' - if len(description_string) > 1 and description_string != 'None': description_display = description_string else: description_display = this_trait.symbol - if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + if (len(description_display) > 1 and description_display != 'N/A' and + len(target_string) > 1 and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template this_trait.description_display = description_display - #print(" xxxxdd [%s]: %s" % (type(this_trait.description_display), description_display)) #XZ: trait_location_value is used for sorting trait_location_repr = 'N/A' trait_location_value = 1000000 if this_trait.chr and this_trait.mb: - try: - trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - except: - if this_trait.chr.upper() == 'X': - trait_location_value = 20*1000 + this_trait.mb - else: - trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) ) + #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y") + #This is so we can convert the location to a number used for sorting + trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb) + #try: + # trait_location_value = int(this_trait.chr)*1000 + this_trait.mb + #except ValueError: + # if this_trait.chr.upper() == 'X': + # trait_location_value = 20*1000 + this_trait.mb + # else: + # trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 + + # this_trait.mb) + + #ZS: Put this in function currently called "convert_location_to_value" + this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, + float(this_trait.mb)) this_trait.location_value = trait_location_value - #this_trait.trait_location_value = trait_location_value - #XZ, 01/12/08: This SQL query is much faster. + #Get mean expression value query = ( -"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' + """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) print("query is:", pf(query)) result = g.db.execute(query).fetchone() + + mean = result[0] if result else 0 - if result: - if result[0]: - mean = result[0] - else: - mean=0 - else: - mean = 0 - - #XZ, 06/05/2009: It is neccessary to turn on nowrap - this_trait.mean = repr = "%2.3f" % mean + this_trait.mean = "%2.3f" % mean #LRS and its location this_trait.LRS_score_repr = 'N/A' @@ -791,23 +788,39 @@ class MrnaAssayDataSet(DataSet): result = self.cursor.fetchone() if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + #if result[0] and result[1]: + # lrs_chr = result[0] + # lrs_mb = result[1] + lrs_chr, lrs_mb = result + #XZ: LRS_location_value is used for sorting + lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb) + + #try: + # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb) + #except: + # if lrs_chr.upper() == 'X': + # lrs_location_value = 20*1000 + float(lrs_mb) + # else: + # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 + + # float(lrs_mb)) + + this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_score_value = this_trait.lrs + this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) + + + def convert_location_to_value(chromosome, mb): + try: + location_value = int(chromosome)*1000 + float(mb) + except ValueError: + if chromosome.upper() == 'X': + location_value = 20*1000 + float(mb) + else: + location_value = (ord(str(chromosome).upper()[0])*1000 + + float(mb)) + + return location_value - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) - def get_sequence(self): query = """ SELECT diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 7c1c035c..5fde114f 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -15,7 +15,7 @@ from pprint import pformat as pf from flask import Flask, g -class GeneralTrait: +class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, Published phenotype, genotype, or user input trait @@ -78,7 +78,7 @@ class GeneralTrait: #desc = self.handle_pca(desc) stringy = desc return stringy - + def display_name(self): diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index aa20eba1..5d40c835 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -30,7 +30,6 @@ from __future__ import absolute_import, print_function, division import string -from math import * import cPickle import os import time @@ -106,6 +105,7 @@ class CorrelationResults(object): corr_samples_group = start_vars['corr_samples_group'] self.sample_data = {} + self.corr_method = start_vars['corr_sample_method'] #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples @@ -123,27 +123,31 @@ class CorrelationResults(object): #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) + self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() + self.correlation_data = {} for trait, values in self.target_dataset.trait_data.iteritems(): - trait_values = [] + this_trait_values = [] target_values = [] for index, sample in enumerate(self.target_dataset.samplelist): - target_value = values[index] - if sample in self.sample_data.keys(): - this_value = self.sample_data[sample] - trait_values.append(this_value) - target_values.append(target_value) - (trait_values, target_values) = normalize_values(trait_values, target_values) - correlation = scipy.stats.pearsonr(trait_values, target_values) - #correlation = cal_correlation(trait_values, target_values) - self.correlation_data[trait] = correlation[0] - #print ('correlation result: %s %s' % (trait, correlation)) - - for trait in self.correlation_data: - print("correlation: ", self.correlation_data[trait]) - + if sample in self.sample_data: + sample_value = self.sample_data[sample] + target_sample_value = values[index] + this_trait_values.append(sample_value) + target_values.append(target_sample_value) + + this_trait_values, target_values = normalize_values(this_trait_values, target_values) + if self.corr_method == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values) + self.correlation_data[trait] = [sample_r, sample_p] + self.correlation_data = collections.OrderedDict( + sorted(self.correlation_data.items(), + key=lambda t: -abs(t[1][0]))) + #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index be750a0c..68fe81ed 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -1,21 +1,42 @@ {% extends "base.html" %} -{% block content %} - - - - - - - - {% for trait in correlation_data %} - - - - {% endfor %} - -
Correlation
{{ correlation_data[trait] }}
+{% block css %} + + + + {% endblock %} +{% block content %} +
+
+

Correlation

+
+
+ + + + + + {% if corr_method == 'pearson' %} + + + {% else %} + + + {% endif %} + + + + {% for trait in correlation_data %} + + + + + + {% endfor %} + +
TraitSample rSample p(r)Sample rhoSample p(rho)
{{ trait }}{{ correlation_data[trait][0] }}{{ correlation_data[trait][1] }}
+{% endblock %} {% block js %} @@ -23,7 +44,6 @@ -