import string
from htmlgen import HTMLgen2 as HT
import webqtlConfig
from webqtlCaseData import webqtlCaseData
from webqtlDataset import webqtlDataset
from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from __future__ import print_function, division
from pprint import pformat as pf
class webqtlTrait:
"""
Trait class defines a trait in webqtl, can be either Microarray,
Published phenotype, genotype, or user input trait
"""
def __init__(self, cursor = None, **kw):
self.cursor = cursor
self.db = None # database object
self.name = '' # Trait ID, ProbeSet ID, Published ID, etc.
self.cellid = ''
self.identification = 'un-named trait'
self.riset = ''
self.haveinfo = 0
self.sequence = '' # Blat sequence, available for ProbeSet
self.data = {}
for name, value in kw.items():
if self.__dict__.has_key(name):
setattr(self, name, value)
elif name == 'fullname':
name2 = value.split("::")
if len(name2) == 2:
self.db, self.name = name2
elif len(name2) == 3:
self.db, self.name, self.cellid = name2
else:
raise KeyError, `value` + ' parameter format error.'
else:
raise KeyError, `name`+' not a valid parameter for this class.'
if self.db and type(self.db) == type("1"):
assert self.cursor
self.db = webqtlDataset(self.db, self.cursor)
#if self.db == None, not from a database
if self.db:
if self.db.type == "Temp":
self.cursor.execute('''
SELECT
InbredSet.Name
FROM
InbredSet, Temp
WHERE
Temp.InbredSetId = InbredSet.Id AND
Temp.Name = "%s"
''' % self.name)
self.riset = self.cursor.fetchone()[0]
else:
self.riset = self.db.getRISet()
#
# In ProbeSet, there are maybe several annotations match one sequence
# so we need use sequence(BlatSeq) as the identification, when we update
# one annotation, we update the others who match the sequence also.
#
# Hongqiang Li, 3/3/2008
#
#XZ, 05/08/2009: This block is not neccessary. We can add 'BlatSeq' into disfield.
# The variable self.sequence should be changed to self.BlatSeq
# It also should be changed in other places where it are used.
if self.db:
if self.db.type == 'ProbeSet':
query = '''
SELECT
ProbeSet.BlatSeq
FROM
ProbeSet, ProbeSetFreeze, ProbeSetXRef
WHERE
ProbeSet.Id=ProbeSetXRef.ProbeSetId and
ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
ProbeSet.Name = "%s" and
ProbeSetFreeze.Name = "%s"
''' % (self.name, self.db.name)
self.cursor.execute(query)
self.sequence = self.cursor.fetchone()[0]
def getName(self):
str = ""
if self.db and self.name:
str = "%s::%s" % (self.db, self.name)
if self.cellid:
str += "::" + self.cellid
else:
str = self.description
return str
#
# when user enter a trait or GN generate a trait, user want show the name
# not the name that generated by GN randomly, the two follow function are
# used to give the real name and the database. displayName() will show the
# database also, getGivenName() just show the name.
# For other trait, displayName() as same as getName(), getGivenName() as
# same as self.name
#
# Hongqiang 11/29/07
#
def getGivenName(self):
str = self.name
if self.db and self.name:
if self.db.type=='Temp':
self.cursor.execute('SELECT description FROM Temp WHERE Name=%s',self.name)
desc = self.cursor.fetchone()[0]
if desc.__contains__('PCA'):
desc = desc[desc.rindex(':')+1:].strip()
else:
desc = desc[:desc.index('entered')].strip()
str = desc
return str
def displayName(self):
str = ""
if self.db and self.name:
if self.db.type=='Temp':
desc = self.description
if desc.__contains__('PCA'):
desc = desc[desc.rindex(':')+1:].strip()
else:
desc = desc[:desc.index('entered')].strip()
str = "%s::%s" % (self.db, desc)
else:
str = "%s::%s" % (self.db, self.name)
if self.cellid:
str += "::" + self.cellid
else:
str = self.description
return str
#def __str__(self):
# #return "%s %s" % (self.getName(), self.riset)
# return self.getName()
__str__ = getName
__repr__ = __str__
def exportData(self, strainlist, type="val"):
"""
export data according to strainlist
mostly used in calculating correlation
"""
result = []
for strain in strainlist:
if self.data.has_key(strain):
if type=='val':
result.append(self.data[strain].val)
elif type=='var':
result.append(self.data[strain].var)
elif type=='N':
result.append(self.data[strain].N)
else:
raise KeyError, `type`+' type is incorrect.'
else:
result.append(None)
return result
def exportInformative(self, incVar=0):
"""
export informative strain
mostly used in qtl regression
"""
strains = []
vals = []
vars = []
for strain, value in self.data.items():
if value.val != None:
if not incVar or value.var != None:
strains.append(strain)
vals.append(value.val)
vars.append(value.var)
return strains, vals, vars
#
# In ProbeSet, there are maybe several annotations match one sequence
# so we need use sequence(BlatSeq) as the identification, when we update
# one annotation, we update the others who match the sequence also.
#
# Hongqiang Li, 3/3/2008
#
def getSequence(self):
assert self.cursor
if self.db.type == 'ProbeSet':
query = '''
SELECT
ProbeSet.BlatSeq
FROM
ProbeSet, ProbeSetFreeze, ProbeSetXRef
WHERE
ProbeSet.Id=ProbeSetXRef.ProbeSetId and
ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
ProbeSet.Name = %s
ProbeSetFreeze.Name = %s
''' , (self.name, self.db.name)
self.cursor.execute(query)
results = self.fetchone()
return results[0]
def retrieveData(self, strainlist=[]):
assert self.db and self.cursor
debug_file = open("/home/zas1024/gn/web/traitlist_debug.txt", "w")
debug_file.write("strianlist is:" + strainlist + "\n")
if self.db.type == 'Temp':
query = '''
SELECT
Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
FROM
TempData, Temp, Strain
WHERE
TempData.StrainId = Strain.Id AND
TempData.Id = Temp.DataId AND
Temp.name = '%s'
Order BY
Strain.Name
''' % self.name
#XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE
elif self.db.type == 'Publish':
query = '''
SELECT
Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
FROM
(PublishData, Strain, PublishXRef, PublishFreeze)
left join PublishSE on
(PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
left join NStrain on
(NStrain.DataId = PublishData.Id AND
NStrain.StrainId = PublishData.StrainId)
WHERE
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
Order BY
Strain.Name
''' % (self.name, self.db.id)
#XZ, 03/02/2009: Xiaodong changed Data to ProbeData, SE to ProbeSE
elif self.cellid:
#Probe Data
query = '''
SELECT
Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id
FROM
(ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef,
Strain, Probe, ProbeSet)
left join ProbeSE on
(ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
WHERE
Probe.Name = '%s' AND ProbeSet.Name = '%s' AND
Probe.ProbeSetId = ProbeSet.Id AND
ProbeXRef.ProbeId = Probe.Id AND
ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
ProbeSetFreeze.Name = '%s' AND
ProbeXRef.DataId = ProbeData.Id AND
ProbeData.StrainId = Strain.Id
Order BY
Strain.Name
''' % (self.cellid, self.name, self.db.name)
#XZ, 03/02/2009: Xiaodong added this block for ProbeSetData and ProbeSetSE
elif self.db.type == 'ProbeSet':
#ProbeSet Data
query = '''
SELECT
Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
FROM
(ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
left join ProbeSetSE on
(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
WHERE
ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
ProbeSetFreeze.Name = '%s' AND
ProbeSetXRef.DataId = ProbeSetData.Id AND
ProbeSetData.StrainId = Strain.Id
Order BY
Strain.Name
''' % (self.name, self.db.name)
#XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE
else:
#Geno Data
#XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search.
query = '''
SELECT
Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
FROM
(GenoData, GenoFreeze, Strain, Geno, GenoXRef)
left join GenoSE on
(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
WHERE
Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
GenoXRef.GenoFreezeId = GenoFreeze.Id AND
GenoFreeze.Name = '%s' AND
GenoXRef.DataId = GenoData.Id AND
GenoData.StrainId = Strain.Id
Order BY
Strain.Name
''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.db.riset), self.name, self.db.name)
self.cursor.execute(query)
results = self.cursor.fetchall()
self.data.clear()
if results:
self.mysqlid = results[0][-1]
if strainlist:
for item in results:
if item[0] in strainlist:
val = item[1]
if val != None:
var = item[2]
ndata = None
if self.db.type in ('Publish', 'Temp'):
ndata = item[3]
self.data[item[0]] = webqtlCaseData(val, var, ndata)
#end for
else:
for item in results:
val = item[1]
if val != None:
var = item[2]
ndata = None
if self.db.type in ('Publish', 'Temp'):
ndata = item[3]
self.data[item[0]] = webqtlCaseData(val, var, ndata)
#end for
#end if
else:
pass
debug_file.write("self.data is:", pf(self.data) + "\n")
def keys(self):
return self.__dict__.keys()
def has_key(self, key):
return self.__dict__.has_key(key)
def items(self):
return self.__dict__.items()
def retrieveInfo(self, QTL = None):
assert self.db and self.cursor
if self.db.type == 'Publish':
#self.db.DisField = ['Name','PubMed_ID','Phenotype','Abbreviation','Authors','Title',\
# 'Abstract', 'Journal','Volume','Pages','Month','Year','Sequence',\
# 'Units', 'comments']
query = '''
SELECT
PublishXRef.Id, Publication.PubMed_ID,
Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
Publication.Authors, Publication.Title, Publication.Abstract,
Publication.Journal, Publication.Volume, Publication.Pages,
Publication.Month, Publication.Year, PublishXRef.Sequence,
Phenotype.Units, PublishXRef.comments
FROM
PublishXRef, Publication, Phenotype, PublishFreeze
WHERE
PublishXRef.Id = %s AND
Phenotype.Id = PublishXRef.PhenotypeId AND
Publication.Id = PublishXRef.PublicationId AND
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishFreeze.Id =%s
''' % (self.name, self.db.id)
#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
#XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
elif self.db.type == 'ProbeSet':
disfieldString = string.join(self.db.disfield,',ProbeSet.')
disfieldString = 'ProbeSet.' + disfieldString
query = """
SELECT %s
FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
WHERE
ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
ProbeSetFreeze.Name = '%s' AND
ProbeSet.Name = '%s'
""" % (disfieldString, self.db.name, self.name)
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.db.type == 'Geno':
disfieldString = string.join(self.db.disfield,',Geno.')
disfieldString = 'Geno.' + disfieldString
query = """
SELECT %s
FROM Geno, GenoFreeze, GenoXRef
WHERE
GenoXRef.GenoFreezeId = GenoFreeze.Id AND
GenoXRef.GenoId = Geno.Id AND
GenoFreeze.Name = '%s' AND
Geno.Name = '%s'
""" % (disfieldString, self.db.name, self.name)
else: #Temp type
query = 'SELECT %s FROM %s WHERE Name = "%s"' % \
(string.join(self.db.disfield,','), self.db.type, self.name)
self.cursor.execute(query)
traitInfo = self.cursor.fetchone()
if traitInfo:
self.haveinfo = 1
#XZ: assign SQL query result to trait attributes.
for i, field in enumerate(self.db.disfield):
setattr(self, field, traitInfo[i])
if self.db.type == 'Publish':
self.confidential = 0
if self.pre_publication_description and not self.pubmed_id:
self.confidential = 1
self.homologeneid = None
if self.db.type == 'ProbeSet' and self.riset and self.geneid:
#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
#XZ: So I have to test if geneid is number before execute the query.
#XZ: The geneid values in database should be cleaned up.
try:
junk = float(self.geneid)
geneidIsNumber = 1
except:
geneidIsNumber = 0
if geneidIsNumber:
query = """
SELECT
HomologeneId
FROM
Homologene, Species, InbredSet
WHERE
Homologene.GeneId =%s AND
InbredSet.Name = '%s' AND
InbredSet.SpeciesId = Species.Id AND
Species.TaxonomyId = Homologene.TaxonomyId
""" % (self.geneid, self.riset)
self.cursor.execute(query)
result = self.cursor.fetchone()
else:
result = None
if result:
self.homologeneid = result[0]
if QTL:
if self.db.type == 'ProbeSet' and not self.cellid:
query = '''
SELECT
ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean
FROM
ProbeSetXRef, ProbeSet
WHERE
ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
ProbeSet.Name = "%s" AND
ProbeSetXRef.ProbeSetFreezeId =%s
''' % (self.name, self.db.id)
self.cursor.execute(query)
traitQTL = self.cursor.fetchone()
if traitQTL:
self.locus, self.lrs, self.pvalue, self.mean = traitQTL
else:
self.locus = self.lrs = self.pvalue = self.mean = ""
if self.db.type == 'Publish':
query = '''
SELECT
PublishXRef.Locus, PublishXRef.LRS
FROM
PublishXRef, PublishFreeze
WHERE
PublishXRef.Id = %s AND
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishFreeze.Id =%s
''' % (self.name, self.db.id)
self.cursor.execute(query)
traitQTL = self.cursor.fetchone()
if traitQTL:
self.locus, self.lrs = traitQTL
else:
self.locus = self.lrs = ""
else:
raise KeyError, `self.name`+' information is not found in the database.'
def genHTML(self, formName = "", dispFromDatabase=0, privilege="guest", userName="Guest", authorized_users=""):
if not self.haveinfo:
self.retrieveInfo()
if self.db.type == 'Publish':
PubMedLink = ""
if self.pubmed_id:
PubMedLink = HT.Href(text="PubMed %d : " % self.pubmed_id,
target = "_blank", url = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id)
else:
PubMedLink = HT.Span("Unpublished : ", Class="fs15")
if formName:
setDescription2 = HT.Href(url="javascript:showDatabase3('%s','%s','%s','')" %
(formName, self.db.name, self.name), Class = "fs14")
else:
setDescription2 = HT.Href(url="javascript:showDatabase2('%s','%s','')" %
(self.db.name,self.name), Class = "fs14")
if self.confidential and not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=privilege, userName=userName, authorized_users=authorized_users):
setDescription2.append('RecordID/%s - %s' % (self.name, self.pre_publication_description))
else:
setDescription2.append('RecordID/%s - %s' % (self.name, self.post_publication_description))
#XZ 03/26/2011: Xiaodong comment out the following two lins as Rob asked. Need to check with Rob why in PublishXRef table, there are few row whose Sequence > 1.
#if self.sequence > 1:
# setDescription2.append(' btach %d' % self.sequence)
if self.authors:
a1 = string.split(self.authors,',')[0]
while a1[0] == '"' or a1[0] == "'" :
a1 = a1[1:]
setDescription2.append(' by ')
setDescription2.append(HT.Italic('%s, and colleagues' % a1))
setDescription = HT.Span(PubMedLink, setDescription2)
elif self.db.type == 'Temp':
setDescription = HT.Href(text="%s" % (self.description),url="javascript:showDatabase2\
('%s','%s','')" % (self.db.name,self.name), Class = "fs14")
setDescription = HT.Span(setDescription)
elif self.db.type == 'Geno': # Genome DB only available for single search
if formName:
setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
'%2.3f' % self.mb),url="javascript:showDatabase3('%s','%s','%s','')" % \
(formName, self.db.name, self.name), Class = "fs14")
else:
setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
'%2.3f' % self.mb),url="javascript:showDatabase2('%s','%s','')" % \
(self.db.name,self.name), Class = "fs14")
setDescription = HT.Span(setDescription)
else:
if self.cellid:
if formName:
setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name, self.cellid),url=\
"javascript:showDatabase3('%s','%s','%s','%s')" % (formName, self.db.name,self.name,self.cellid), \
Class = "fs14")
else:
setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name,self.cellid),url=\
"javascript:showDatabase2('%s','%s','%s')" % (self.db.name,self.name,self.cellid), \
Class = "fs14")
else:
if formName:
setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
"javascript:showDatabase3('%s','%s','%s','')" % (formName, self.db.name,self.name), \
Class = "fs14")
else:
setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
"javascript:showDatabase2('%s','%s','')" % (self.db.name,self.name), \
Class = "fs14")
if self.symbol and self.chr and self.mb:
setDescription.append(' [')
setDescription.append(HT.Italic('%s' % self.symbol,Class="cdg fwb"))
setDescription.append(' on Chr %s @ %s Mb]' % (self.chr,self.mb))
if self.description:
setDescription.append(': %s' % self.description)
if self.probe_target_description:
setDescription.append('; %s' % self.probe_target_description)
setDescription = HT.Span(setDescription)
if self.db.type != 'Temp' and dispFromDatabase:
setDescription.append( ' --- FROM : ')
setDescription.append(self.db.genHTML(Class='cori'))
return setDescription