diff options
Diffstat (limited to 'wqflask/base/trait.py')
-rw-r--r-- | wqflask/base/trait.py | 218 |
1 files changed, 124 insertions, 94 deletions
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 8f8b5b70..24d0476d 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,39 +1,30 @@ -from __future__ import absolute_import, division, print_function - -import os -import string -import resource -import codecs import requests -import random -import urllib +import simplejson as json +from wqflask import app from base import webqtlConfig from base.webqtlCaseData import webqtlCaseData from base.data_set import create_dataset -from db import webqtlDatabaseFunction -from utility import webqtlUtil from utility import hmac from utility.authentication_tools import check_resource_availability -from utility.tools import GN2_BASE_URL, GN_VERSION -from utility.redis_tools import get_redis_conn, get_resource_id, get_resource_info -Redis = get_redis_conn() - -from wqflask import app +from utility.tools import GN2_BASE_URL +from utility.redis_tools import get_redis_conn, get_resource_id -import simplejson as json -from MySQLdb import escape_string as escape -from pprint import pformat as pf +from utility.db_tools import escape -from flask import Flask, g, request, url_for, redirect, make_response, render_template +from flask import g, request, url_for from utility.logger import getLogger -logger = getLogger(__name__ ) + +logger = getLogger(__name__) + +Redis = get_redis_conn() + def create_trait(**kw): - assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + assert bool(kw.get('dataset')) != bool( + kw.get('dataset_name')), "Needs dataset ob. or name" - permitted = True if kw.get('name'): if kw.get('dataset_name'): if kw.get('dataset_name') != "Temp": @@ -43,18 +34,23 @@ def create_trait(**kw): if kw.get('dataset_name') != "Temp": if dataset.type == 'Publish': - permissions = check_resource_availability(dataset, kw.get('name')) + permissions = check_resource_availability( + dataset, kw.get('name')) else: permissions = check_resource_availability(dataset) if "view" in permissions['data']: the_trait = GeneralTrait(**kw) if the_trait.dataset.type != "Temp": - the_trait = retrieve_trait_info(the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info')) + the_trait = retrieve_trait_info( + the_trait, + the_trait.dataset, + get_qtl_info=kw.get('get_qtl_info')) return the_trait else: return None + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -64,12 +60,17 @@ class GeneralTrait(object): def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion - assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; - self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. + assert bool(kw.get('dataset')) != bool( + kw.get('dataset_name')), "Needs dataset ob. or name" + # Trait ID, ProbeSet ID, Published ID, etc. + self.name = kw.get('name') if kw.get('dataset_name'): if kw.get('dataset_name') == "Temp": temp_group = self.name.split("_")[2] - self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + self.dataset = create_dataset( + dataset_name="Temp", + dataset_type="Temp", + group_name=temp_group) else: self.dataset = create_dataset(kw.get('dataset_name')) else: @@ -77,7 +78,8 @@ class GeneralTrait(object): self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) - self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet + # Blat sequence, available for ProbeSet + self.sequence = kw.get('sequence') self.data = kw.get('data', {}) self.view = True @@ -103,9 +105,10 @@ class GeneralTrait(object): elif len(name2) == 3: self.dataset, self.name, self.cellid = name2 - # Todo: These two lines are necessary most of the time, but perhaps not all of the time - # So we could add a simple if statement to short-circuit this if necessary - if get_sample_info != False: + # Todo: These two lines are necessary most of the time, but + # perhaps not all of the time So we could add a simple if + # statement to short-circuit this if necessary + if get_sample_info is not False: self = retrieve_sample_data(self, self.dataset) def export_informative(self, include_variance=0): @@ -118,14 +121,14 @@ class GeneralTrait(object): vals = [] the_vars = [] sample_aliases = [] - for sample_name, sample_data in self.data.items(): - if sample_data.value != None: - if not include_variance or sample_data.variance != None: + for sample_name, sample_data in list(self.data.items()): + if sample_data.value is not None: + if not include_variance or sample_data.variance is not None: samples.append(sample_name) vals.append(sample_data.value) the_vars.append(sample_data.variance) sample_aliases.append(sample_data.name2) - return samples, vals, the_vars, sample_aliases + return samples, vals, the_vars, sample_aliases @property def description_fmt(self): @@ -144,7 +147,8 @@ class GeneralTrait(object): formatted = self.post_publication_description else: formatted = "Not available" - + if isinstance(formatted, bytes): + formatted = formatted.decode("utf-8") return formatted @property @@ -153,8 +157,8 @@ class GeneralTrait(object): alias = 'Not available' if getattr(self, "alias", None): - alias = string.replace(self.alias, ";", " ") - alias = string.join(string.split(alias), ", ") + alias = self.alias.replace(";", " ") + alias = ", ".join(alias.split()) return alias @@ -164,12 +168,17 @@ class GeneralTrait(object): alias = 'Not available' if self.symbol: - human_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) - mouse_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) - other_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) + human_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) + mouse_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) + other_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) if human_response and mouse_response and other_response: - alias_list = json.loads(human_response.content) + json.loads(mouse_response.content) + json.loads(other_response.content) + alias_list = json.loads(human_response.content) + json.loads( + mouse_response.content) + \ + json.loads(other_response.content) filtered_aliases = [] seen = set() @@ -183,33 +192,34 @@ class GeneralTrait(object): return alias - @property def location_fmt(self): '''Return a text formatted location - While we're at it we set self.location in case we need it later (do we?) + While we're at it we set self.location in case we need it + later (do we?) ''' if self.chr and self.mb: - self.location = 'Chr %s @ %s Mb' % (self.chr,self.mb) + self.location = 'Chr %s @ %s Mb' % (self.chr, self.mb) elif self.chr: self.location = 'Chr %s @ Unknown position' % (self.chr) else: self.location = 'Not available' fmt = self.location - ##XZ: deal with direction + # XZ: deal with direction if self.strand_probe == '+': fmt += (' on the plus strand ') elif self.strand_probe == '-': fmt += (' on the minus strand ') return fmt - + + def retrieve_sample_data(trait, dataset, samplelist=None): - if samplelist == None: + if samplelist is None: samplelist = [] if dataset.type == "Temp": @@ -225,16 +235,19 @@ def retrieve_sample_data(trait, dataset, samplelist=None): all_samples_ordered = dataset.group.all_samples_ordered() for i, item in enumerate(results): try: - trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item)) + trait.data[all_samples_ordered[i]] = webqtlCaseData( + all_samples_ordered[i], float(item)) except: pass else: for item in results: name, value, variance, num_cases, name2 = item if not samplelist or (samplelist and name in samplelist): - trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) + # name, value, variance, num_cases) + trait.data[name] = webqtlCaseData(*item) return trait + @app.route("/trait/get_sample_data") def get_sample_data(): params = request.args @@ -250,7 +263,8 @@ def get_sample_data(): trait_dict['group'] = trait_ob.dataset.group.name trait_dict['tissue'] = trait_ob.dataset.tissue trait_dict['species'] = trait_ob.dataset.group.species - trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) + trait_dict['url'] = url_for( + 'show_trait_page', trait_id=trait, dataset=dataset) trait_dict['description'] = trait_ob.description_display if trait_ob.dataset.type == "ProbeSet": trait_dict['symbol'] = trait_ob.symbol @@ -260,22 +274,27 @@ def get_sample_data(): trait_dict['pubmed_link'] = trait_ob.pubmed_link trait_dict['pubmed_text'] = trait_ob.pubmed_text - return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + return json.dumps([trait_dict, {key: value.value for + key, value in list( + trait_ob.data.items())}]) else: return None - + + def jsonable(trait): """Return a dict suitable for using as json Actual turning into json doesn't happen here though""" - dataset = create_dataset(dataset_name = trait.dataset.name, dataset_type = trait.dataset.type, group_name = trait.dataset.group.name) - + dataset = create_dataset(dataset_name=trait.dataset.name, + dataset_type=trait.dataset.type, + group_name=trait.dataset.group.name) + if dataset.type == "ProbeSet": return dict(name=trait.name, symbol=trait.symbol, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, mean=trait.mean, location=trait.location_repr, @@ -287,7 +306,7 @@ def jsonable(trait): if trait.pubmed_id: return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, @@ -300,7 +319,7 @@ def jsonable(trait): else: return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, @@ -312,19 +331,20 @@ def jsonable(trait): elif dataset.type == "Geno": return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, location=trait.location_repr ) else: return dict() + def jsonable_table_row(trait, dataset_name, index): """Return a list suitable for json and intended to be displayed in a table Actual turning into json doesn't happen here though""" dataset = create_dataset(dataset_name) - + if dataset.type == "ProbeSet": if trait.mean == "": mean = "N/A" @@ -336,11 +356,13 @@ def jsonable_table_row(trait, dataset_name, index): additive = "%.3f" % round(float(trait.additive), 2) return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.symbol, trait.description_display, trait.location_repr, - mean, + mean, trait.LRS_score_repr, trait.LRS_location_repr, additive] @@ -352,7 +374,9 @@ def jsonable_table_row(trait, dataset_name, index): if trait.pubmed_id: return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.description_display, trait.authors, '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>', @@ -362,7 +386,9 @@ def jsonable_table_row(trait, dataset_name, index): else: return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.description_display, trait.authors, trait.pubmed_text, @@ -372,7 +398,9 @@ def jsonable_table_row(trait, dataset_name, index): elif dataset.type == "Geno": return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.location_repr] else: return dict() @@ -383,14 +411,16 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): resource_id = get_resource_id(dataset, trait.name) if dataset.type == 'Publish': - the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format( + resource_id, g.user_session.user_id) else: - the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format( + resource_id, g.user_session.user_id, trait.name) try: response = requests.get(the_url).content trait_info = json.loads(response) - except: #ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait + except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait if dataset.type == 'Publish': query = """ SELECT @@ -419,8 +449,8 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): logger.sql(query) trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. + # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name + # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif dataset.type == 'ProbeSet': display_fields_string = ', ProbeSet.'.join(dataset.display_fields) display_fields_string = 'ProbeSet.' + display_fields_string @@ -433,14 +463,14 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' """ % (escape(display_fields_string), - escape(dataset.name), - escape(str(trait.name))) + escape(dataset.name), + escape(str(trait.name))) logger.sql(query) trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name + # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif dataset.type == 'Geno': - display_fields_string = string.join(dataset.display_fields,',Geno.') + display_fields_string = ',Geno.'.join(dataset.display_fields) display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s @@ -451,21 +481,23 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): GenoFreeze.Name = '%s' AND Geno.Name = '%s' """ % (escape(display_fields_string), - escape(dataset.name), - escape(trait.name)) + escape(dataset.name), + escape(trait.name)) logger.sql(query) trait_info = g.db.execute(query).fetchone() - else: #Temp type + else: # Temp type query = """SELECT %s FROM %s WHERE Name = %s""" logger.sql(query) trait_info = g.db.execute(query, - (string.join(dataset.display_fields,','), - dataset.type, trait.name)).fetchone() + ','.join(dataset.display_fields), + dataset.type, trait.name).fetchone() if trait_info: trait.haveinfo = True for i, field in enumerate(dataset.display_fields): - holder = trait_info[i] + holder = trait_info[i] + if isinstance(holder, bytes): + holder = holder.decode('utf-8') setattr(trait, field, holder) if dataset.type == 'Publish': @@ -478,9 +510,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): description = trait.post_publication_description - #If the dataset is confidential and the user has access to confidential - #phenotype traits, then display the pre-publication description instead - #of the post-publication description + # If the dataset is confidential and the user has access to confidential + # phenotype traits, then display the pre-publication description instead + # of the post-publication description if trait.confidential: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description @@ -491,10 +523,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.description_display = "" - trait.abbreviation = unicode(str(trait.abbreviation).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.description_display = unicode(str(trait.description_display).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.authors = unicode(str(trait.authors).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - if not trait.year.isdigit(): trait.pubmed_text = "N/A" else: @@ -504,8 +532,8 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id if dataset.type == 'ProbeSet' and dataset.group: - description_string = unicode(str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = unicode(str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') + description_string = trait.description + target_string = trait.probe_target_description if str(description_string or "") != "" and description_string != 'None': description_display = description_string @@ -522,15 +550,17 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.location_repr = 'N/A' if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) elif dataset.type == "Geno": trait.location_repr = 'N/A' if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) if get_qtl_info: - #LRS and its location + # LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" @@ -605,6 +635,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: - raise KeyError, `trait.name`+' information is not found in the database.' - + raise KeyError(repr(trait.name) + + ' information is not found in the database.') return trait |