aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base/trait.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base/trait.py')
-rw-r--r--[-rwxr-xr-x]wqflask/base/trait.py243
1 files changed, 155 insertions, 88 deletions
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index ff80795c..a71d8157 100755..100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, division, print_function
import string
import resource
-
+import codecs
from htmlgen import HTMLgen2 as HT
@@ -31,16 +31,16 @@ class GeneralTrait(object):
"""
- def __init__(self, get_qtl_info=False, **kw):
+ def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
# xor assertion
assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
if kw.get('dataset_name'):
self.dataset = create_dataset(kw.get('dataset_name'))
- print(" in GeneralTrait created dataset:", self.dataset)
+ #print(" in GeneralTrait created dataset:", self.dataset)
else:
self.dataset = kw.get('dataset')
self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
- print("THE NAME IS:", self.name)
+ #print("THE NAME IS:", self.name)
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
@@ -67,7 +67,8 @@ class GeneralTrait(object):
# Todo: These two lines are necessary most of the time, but perhaps not all of the time
# So we could add a simple if statement to short-circuit this if necessary
self.retrieve_info(get_qtl_info=get_qtl_info)
- self.retrieve_sample_data()
+ if get_sample_info != False:
+ self.retrieve_sample_data()
def jsonable(self):
@@ -179,13 +180,15 @@ class GeneralTrait(object):
samples = []
vals = []
the_vars = []
+ sample_aliases = []
for sample_name, sample_data in self.data.items():
if sample_data.value != None:
if not include_variance or sample_data.variance != None:
samples.append(sample_name)
vals.append(sample_data.value)
the_vars.append(sample_data.variance)
- return samples, vals, the_vars
+ sample_aliases.append(sample_data.name2)
+ return samples, vals, the_vars, sample_aliases
#
@@ -220,32 +223,6 @@ class GeneralTrait(object):
if samplelist == None:
samplelist = []
- #assert self.dataset
-
- #if self.cellid:
- # #Probe Data
- # query = '''
- # SELECT
- # Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id
- # FROM
- # (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef,
- # Strain, Probe, ProbeSet)
- # left join ProbeSE on
- # (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
- # WHERE
- # Probe.Name = '%s' AND ProbeSet.Name = '%s' AND
- # Probe.ProbeSetId = ProbeSet.Id AND
- # ProbeXRef.ProbeId = Probe.Id AND
- # ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND
- # ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
- # ProbeSetFreeze.Name = '%s' AND
- # ProbeXRef.DataId = ProbeData.Id AND
- # ProbeData.StrainId = Strain.Id
- # Order BY
- # Strain.Name
- # ''' % (self.cellid, self.name, self.dataset.name)
- #
- #else:
results = self.dataset.retrieve_sample_data(self.name)
# Todo: is this necessary? If not remove
@@ -255,19 +232,10 @@ class GeneralTrait(object):
if results:
for item in results:
- name, value, variance, num_cases = item
+ name, value, variance, num_cases, name2 = item
if not samplelist or (samplelist and name in samplelist):
self.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
- #def keys(self):
- # return self.__dict__.keys()
- #
- #def has_key(self, key):
- # return self.__dict__.has_key(key)
- #
- #def items(self):
- # return self.__dict__.items()
-
def retrieve_info(self, get_qtl_info=False):
assert self.dataset, "Dataset doesn't exist"
if self.dataset.type == 'Publish':
@@ -290,10 +258,10 @@ class GeneralTrait(object):
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishFreeze.Id = %s
""" % (self.name, self.dataset.id)
-
- print("query is:", query)
trait_info = g.db.execute(query).fetchone()
+
+
#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
#XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
elif self.dataset.type == 'ProbeSet':
@@ -328,7 +296,6 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
trait_info = g.db.execute(query).fetchone()
- #print("trait_info is: ", pf(trait_info))
else: #Temp type
query = """SELECT %s FROM %s WHERE Name = %s"""
trait_info = g.db.execute(query,
@@ -339,54 +306,118 @@ class GeneralTrait(object):
#XZ: assign SQL query result to trait attributes.
for i, field in enumerate(self.dataset.display_fields):
- #print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i]))
holder = trait_info[i]
if isinstance(trait_info[i], basestring):
holder = unicode(trait_info[i], "utf8", "ignore")
setattr(self, field, holder)
-
+
if self.dataset.type == 'Publish':
self.confidential = 0
if self.pre_publication_description and not self.pubmed_id:
self.confidential = 1
+
+ description = self.post_publication_description
+
+ #If the dataset is confidential and the user has access to confidential
+ #phenotype traits, then display the pre-publication description instead
+ #of the post-publication description
+ if self.confidential:
+ self.description_display = ""
+
+ #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
+ # privilege=self.dataset.privilege,
+ # userName=self.dataset.userName,
+ # authorized_users=self.authorized_users):
+ #
+ # description = self.pre_publication_description
+
+ if description:
+ self.description_display = description.strip()
+ else:
+ self.description_display = ""
- self.homologeneid = None
-
- #print("self.geneid is:", self.geneid)
- #print(" type:", type(self.geneid))
- #print("self.dataset.group.name is:", self.dataset.group.name)
- if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
- #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
- #XZ: So I have to test if geneid is number before execute the query.
- #XZ: The geneid values in database should be cleaned up.
- #try:
- # float(self.geneid)
- # geneidIsNumber = True
- #except ValueError:
- # geneidIsNumber = False
-
- #if geneidIsNumber:
-
-
- query = """
- SELECT
- HomologeneId
- FROM
- Homologene, Species, InbredSet
- WHERE
- Homologene.GeneId =%s AND
- InbredSet.Name = '%s' AND
- InbredSet.SpeciesId = Species.Id AND
- Species.TaxonomyId = Homologene.TaxonomyId
- """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
- result = g.db.execute(query).fetchone()
- #else:
- # result = None
+ if not self.year.isdigit():
+ self.pubmed_text = "N/A"
+ else:
+ self.pubmed_text = self.year
- if result:
- self.homologeneid = result[0]
+ if self.pubmed_id:
+ self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id
+
+
+ self.homologeneid = None
+ if self.dataset.type == 'ProbeSet' and self.dataset.group:
+ if self.geneid:
+ #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
+ #XZ: So I have to test if geneid is number before execute the query.
+ #XZ: The geneid values in database should be cleaned up.
+ #try:
+ # float(self.geneid)
+ # geneidIsNumber = True
+ #except ValueError:
+ # geneidIsNumber = False
+ #if geneidIsNumber:
+ query = """
+ SELECT
+ HomologeneId
+ FROM
+ Homologene, Species, InbredSet
+ WHERE
+ Homologene.GeneId =%s AND
+ InbredSet.Name = '%s' AND
+ InbredSet.SpeciesId = Species.Id AND
+ Species.TaxonomyId = Homologene.TaxonomyId
+ """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
+ result = g.db.execute(query).fetchone()
+ #else:
+ # result = None
+
+ if result:
+ self.homologeneid = result[0]
+
+ description_string = unicode(str(self.description).strip(codecs.BOM_UTF8), 'utf-8')
+ target_string = unicode(str(self.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
+
+ if len(description_string) > 1 and description_string != 'None':
+ description_display = description_string
+ else:
+ description_display = self.symbol
+
+ if (len(description_display) > 1 and description_display != 'N/A' and
+ len(target_string) > 1 and target_string != 'None'):
+ description_display = description_display + '; ' + target_string.strip()
+
+ # Save it for the jinja2 template
+ self.description_display = description_display
+
+ #XZ: trait_location_value is used for sorting
+ trait_location_repr = 'N/A'
+ trait_location_value = 1000000
+
+ if self.chr and self.mb:
+ #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+ #This is so we can convert the location to a number used for sorting
+ trait_location_value = convert_location_to_value(self.chr, self.mb)
+ #try:
+ # trait_location_value = int(self.chr)*1000 + self.mb
+ #except ValueError:
+ # if self.chr.upper() == 'X':
+ # trait_location_value = 20*1000 + self.mb
+ # else:
+ # trait_location_value = (ord(str(self.chr).upper()[0])*1000 +
+ # self.mb)
+
+ #ZS: Put this in function currently called "convert_location_to_value"
+ self.location_repr = 'Chr%s: %.6f' % (self.chr, float(self.mb))
+ self.location_value = trait_location_value
+
if get_qtl_info:
+ #LRS and its location
+ self.LRS_score_repr = "N/A"
+ self.LRS_score_value = 0
+ self.LRS_location_repr = "N/A"
+ self.LRS_location_value = 1000000
if self.dataset.type == 'ProbeSet' and not self.cellid:
query = """
SELECT
@@ -399,12 +430,8 @@ class GeneralTrait(object):
ProbeSetXRef.ProbeSetFreezeId ={}
""".format(self.name, self.dataset.id)
trait_qtl = g.db.execute(query).fetchone()
- #self.cursor.execute(query)
- #trait_qtl = self.cursor.fetchone()
if trait_qtl:
- print("trait_qtl:", trait_qtl)
self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl
- print("self.locus:", self.locus)
if self.locus:
query = """
select Geno.Chr, Geno.Mb from Geno, Species
@@ -417,9 +444,9 @@ class GeneralTrait(object):
self.locus_chr = result[0]
self.locus_mb = result[1]
else:
- self.locus = self.locus_chr = self.locus_mb = ""
+ self.locus = self.locus_chr = self.locus_mb = self.additive = ""
else:
- self.locus = self.locus_chr = self.locus_mb = ""
+ self.locus = self.locus_chr = self.locus_mb = self.additive = ""
else:
self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = self.additive = ""
@@ -437,8 +464,38 @@ class GeneralTrait(object):
""", (self.name, self.dataset.id)).fetchone()
if trait_qtl:
self.locus, self.lrs, self.additive = trait_qtl
+ if self.locus:
+ query = """
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = '{}' and
+ Geno.Name = '{}' and
+ Geno.SpeciesId = Species.Id
+ """.format(self.dataset.group.species, self.locus)
+ result = g.db.execute(query).fetchone()
+ if result:
+ self.locus_chr = result[0]
+ self.locus_mb = result[1]
+ else:
+ self.locus = self.locus_chr = self.locus_mb = self.additive = ""
+ else:
+ self.locus = self.locus_chr = self.locus_mb = self.additive = ""
else:
self.locus = self.lrs = self.additive = ""
+
+ if (self.dataset.type == 'Publish' or self.dataset.type == "ProbeSet") and self.locus_chr != "" and self.locus_mb != "":
+ #XZ: LRS_location_value is used for sorting
+ try:
+ LRS_location_value = int(self.locus_chr)*1000 + float(self.locus_mb)
+ except:
+ if self.locus_chr.upper() == 'X':
+ LRS_location_value = 20*1000 + float(self.locus_mb)
+ else:
+ LRS_location_value = ord(str(self.locus_chr).upper()[0])*1000 + float(self.locus_mb)
+
+ self.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (self.locus_chr, float(self.locus_mb))
+ if self.lrs != "":
+ self.LRS_score_repr = LRS_score_repr = '%3.1f' % self.lrs
+ self.LRS_score_value = LRS_score_value = self.lrs
else:
raise KeyError, `self.name`+' information is not found in the database.'
@@ -646,7 +703,17 @@ class GeneralTrait(object):
ZValue = ZValue*sqrt(self.overlap-3)
self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
-
+def convert_location_to_value(chromosome, mb):
+ try:
+ location_value = int(chromosome)*1000 + float(mb)
+ except ValueError:
+ if chromosome.upper() == 'X':
+ location_value = 20*1000 + float(mb)
+ else:
+ location_value = (ord(str(chromosome).upper()[0])*1000 +
+ float(mb))
+
+ return location_value
@app.route("/trait/get_sample_data")
def get_sample_data():