about summary refs log tree commit diff
path: root/web/webqtl/base/webqtlTrait.py
diff options
context:
space:
mode:
Diffstat (limited to 'web/webqtl/base/webqtlTrait.py')
-rw-r--r--web/webqtl/base/webqtlTrait.py581
1 files changed, 581 insertions, 0 deletions
diff --git a/web/webqtl/base/webqtlTrait.py b/web/webqtl/base/webqtlTrait.py
new file mode 100644
index 00000000..f5051e45
--- /dev/null
+++ b/web/webqtl/base/webqtlTrait.py
@@ -0,0 +1,581 @@
+import string
+
+from htmlgen import HTMLgen2 as HT
+
+import webqtlConfig
+from webqtlCaseData import webqtlCaseData
+from webqtlDataset import webqtlDataset
+from dbFunction import webqtlDatabaseFunction
+from utility import webqtlUtil
+
+
+class webqtlTrait:
+	"""
+	Trait class defines a trait in webqtl, can be either Microarray, 	
+	Published phenotype, genotype, or user input trait
+	"""
+
+	def __init__(self, cursor = None, **kw):
+		self.cursor = cursor
+		self.db = None			# database object
+		self.name = ''			# Trait ID, ProbeSet ID, Published ID, etc.
+		self.cellid = ''
+		self.identification = 'un-named trait'
+		self.riset = ''
+		self.haveinfo = 0
+		self.sequence = ''		# Blat sequence, available for ProbeSet
+		self.data = {}
+		for name, value in kw.items():
+			if self.__dict__.has_key(name):
+				setattr(self, name, value)
+			elif name == 'fullname':
+				name2 = value.split("::")
+				if len(name2) == 2:
+					self.db, self.name = name2
+				elif len(name2) == 3:
+					self.db, self.name, self.cellid = name2
+				else:
+					raise KeyError, `value` + ' parameter format error.'
+			else:
+				raise KeyError, `name`+' not a valid parameter for this class.'
+		
+		if self.db and type(self.db) == type("1"):
+			assert self.cursor
+			self.db = webqtlDataset(self.db, self.cursor)
+
+		#if self.db == None, not from a database
+		if self.db:
+			if self.db.type == "Temp":
+				self.cursor.execute('''
+					SELECT 
+						InbredSet.Name 
+					FROM 
+						InbredSet, Temp 
+					WHERE 
+						Temp.InbredSetId = InbredSet.Id AND 
+						Temp.Name = "%s"
+				''' % self.name)
+				self.riset = self.cursor.fetchone()[0]
+			else:	
+				self.riset = self.db.getRISet()
+
+		#
+		# In ProbeSet, there are maybe several annotations match one sequence
+		# so we need use sequence(BlatSeq) as the identification, when we update
+		# one annotation, we update the others who match the sequence also.
+		#
+		# Hongqiang Li, 3/3/2008
+		#
+
+                #XZ, 05/08/2009: This block is not neccessary. We can add 'BlatSeq' into disfield.
+                # The variable self.sequence should be changed to self.BlatSeq
+                # It also should be changed in other places where it are used.
+
+		if self.db:
+			if self.db.type == 'ProbeSet':
+				query = '''
+					SELECT 
+						ProbeSet.BlatSeq
+					FROM
+						ProbeSet, ProbeSetFreeze, ProbeSetXRef
+					WHERE
+						ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+						ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
+						ProbeSet.Name = "%s" and
+						ProbeSetFreeze.Name = "%s"
+				''' % (self.name, self.db.name)
+				self.cursor.execute(query)
+				self.sequence = self.cursor.fetchone()[0]
+
+	
+	def getName(self):
+		str = ""
+		if self.db and self.name:
+			str = "%s::%s" % (self.db, self.name)
+			if self.cellid:
+				str += "::" + self.cellid
+		else:
+			str = self.description
+		return str
+
+	# 
+	# when user enter a trait or GN generate a trait, user want show the name
+	# not the name that generated by GN randomly, the two follow function are
+	# used to give the real name and the database. displayName() will show the 
+	# database also, getGivenName() just show the name. 
+	# For other trait, displayName() as same as getName(), getGivenName() as 
+	# same as self.name
+	#
+	# Hongqiang 11/29/07
+	#
+	def getGivenName(self):
+		str = self.name
+		if self.db and self.name:
+			if self.db.type=='Temp':
+				self.cursor.execute('SELECT description FROM Temp WHERE Name=%s',self.name)
+				desc = self.cursor.fetchone()[0]
+				if desc.__contains__('PCA'):
+					desc = desc[desc.rindex(':')+1:].strip()
+				else:
+					desc = desc[:desc.index('entered')].strip()
+				str = desc
+		return str
+
+	def displayName(self):
+		str = ""
+		if self.db and self.name:
+			if self.db.type=='Temp':
+				desc = self.description
+				if desc.__contains__('PCA'):
+					desc = desc[desc.rindex(':')+1:].strip()
+				else:
+					desc = desc[:desc.index('entered')].strip()
+				str = "%s::%s" % (self.db, desc)
+			else:
+				str = "%s::%s" % (self.db, self.name)
+				if self.cellid:
+					str += "::" + self.cellid
+		else:
+			str = self.description
+	
+		return str
+
+	
+	#def __str__(self):
+	#	#return "%s %s" % (self.getName(), self.riset)
+	#	return self.getName()
+	__str__ = getName
+	__repr__ = __str__
+
+	def exportData(self, strainlist, type="val"):
+		"""
+			export data according to strainlist
+			mostly used in calculating correlation
+		"""
+		result = []
+		for strain in strainlist:
+			if self.data.has_key(strain):
+				if type=='val':
+					result.append(self.data[strain].val)
+				elif type=='var':
+					result.append(self.data[strain].var)
+				elif type=='N':
+					result.append(self.data[strain].N)
+				else:
+					raise KeyError, `type`+' type is incorrect.'
+			else:
+				result.append(None)
+		return result
+		
+	def exportInformative(self, incVar=0):
+		"""
+			export informative strain
+			mostly used in qtl regression
+		"""
+		strains = []
+		vals = []
+		vars = []
+		for strain, value in self.data.items():
+			if value.val != None:
+				if not incVar or value.var != None:
+					strains.append(strain)
+					vals.append(value.val)
+					vars.append(value.var)
+		return 	strains, vals, vars
+
+
+	#
+	# In ProbeSet, there are maybe several annotations match one sequence
+	# so we need use sequence(BlatSeq) as the identification, when we update
+	# one annotation, we update the others who match the sequence also.
+	#
+	# Hongqiang Li, 3/3/2008
+	#
+	def getSequence(self):
+		assert self.cursor
+		if self.db.type == 'ProbeSet':
+			query = '''
+					SELECT 
+						ProbeSet.BlatSeq
+					FROM
+						ProbeSet, ProbeSetFreeze, ProbeSetXRef
+					WHERE
+						ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+						ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+						ProbeSet.Name = %s
+						ProbeSetFreeze.Name = %s
+				''' , (self.name, self.db.name)
+			self.cursor.execute(query)
+			results = self.fetchone()
+
+			return results[0]
+			
+			
+		
+	def retrieveData(self, strainlist=[]):
+		assert self.db and self.cursor
+
+		if self.db.type == 'Temp':
+			query = '''
+				SELECT 
+					Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id 
+				FROM 
+					TempData, Temp, Strain 
+				WHERE 
+					TempData.StrainId = Strain.Id AND 
+					TempData.Id = Temp.DataId AND 
+					Temp.name = '%s'
+				Order BY
+					Strain.Name
+				''' % self.name
+		#XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE
+		elif self.db.type == 'Publish': 
+			query = '''
+				SELECT 
+					Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id 
+				FROM 
+					(PublishData, Strain, PublishXRef, PublishFreeze)
+				left join PublishSE on 
+					(PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+				left join NStrain on 
+					(NStrain.DataId = PublishData.Id AND 
+					NStrain.StrainId = PublishData.StrainId) 
+				WHERE 
+					PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND 
+					PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND 
+					PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+				Order BY
+					Strain.Name
+				''' % (self.name, self.db.id)
+
+                #XZ, 03/02/2009: Xiaodong changed Data to ProbeData, SE to ProbeSE
+		elif self.cellid:
+			#Probe Data
+			query = '''
+				SELECT 
+					Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id 
+				FROM 
+					(ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef, 
+					Strain, Probe, ProbeSet)
+				left join ProbeSE on 
+					(ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
+				WHERE 
+					Probe.Name = '%s' AND ProbeSet.Name = '%s' AND 
+					Probe.ProbeSetId = ProbeSet.Id AND 
+					ProbeXRef.ProbeId = Probe.Id AND 
+					ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND 
+					ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND 
+					ProbeSetFreeze.Name = '%s' AND 
+					ProbeXRef.DataId = ProbeData.Id AND 
+					ProbeData.StrainId = Strain.Id
+				Order BY
+					Strain.Name
+				''' % (self.cellid, self.name, self.db.name)
+                #XZ, 03/02/2009: Xiaodong added this block for ProbeSetData and ProbeSetSE
+		elif self.db.type == 'ProbeSet':
+			#ProbeSet Data
+                        query = '''
+                                SELECT
+                                        Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+                                FROM
+                                        (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                                left join ProbeSetSE on
+                                        (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                                WHERE
+                                        ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                                        ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                                        ProbeSetFreeze.Name = '%s' AND
+                                        ProbeSetXRef.DataId = ProbeSetData.Id AND
+                                        ProbeSetData.StrainId = Strain.Id
+                                Order BY
+                                        Strain.Name
+                                ''' % (self.name, self.db.name)
+                #XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE
+		else:
+			#Geno Data
+			#XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search.
+			query = '''
+				SELECT 
+					Strain.Name, GenoData.value, GenoSE.error, GenoData.Id 
+				FROM 
+					(GenoData, GenoFreeze, Strain, Geno, GenoXRef)
+				left join GenoSE on
+					(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+				WHERE 
+					Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+					GenoXRef.GenoFreezeId = GenoFreeze.Id AND 
+					GenoFreeze.Name = '%s' AND 
+					GenoXRef.DataId = GenoData.Id AND 
+					GenoData.StrainId = Strain.Id
+				Order BY
+					Strain.Name
+				''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.db.riset), self.name, self.db.name)
+
+		
+		self.cursor.execute(query)
+		results = self.cursor.fetchall()
+		self.data.clear()
+		if results:
+			self.mysqlid = results[0][-1]
+			if strainlist:
+				for item in results:
+					if item[0] in strainlist:
+						val = item[1]
+						if val != None:
+							var = item[2]
+							ndata = None
+							if self.db.type in ('Publish', 'Temp'):
+								ndata = item[3]
+							self.data[item[0]] = webqtlCaseData(val, var, ndata)
+				#end for
+			else:	
+				for item in results:
+					val = item[1]
+					if val != None:
+						var = item[2]
+						ndata = None
+						if self.db.type in ('Publish', 'Temp'):
+							ndata = item[3]
+						self.data[item[0]] = webqtlCaseData(val, var, ndata)
+				#end for
+			#end if
+		else:
+			pass
+			
+	def keys(self):
+		return self.__dict__.keys()
+	
+	def has_key(self, key):
+		return self.__dict__.has_key(key)
+	
+	def items(self):
+		return self.__dict__.items()
+	
+	def retrieveInfo(self, QTL = None):
+		assert self.db and self.cursor
+		if self.db.type == 'Publish':
+			#self.db.DisField = ['Name','PubMed_ID','Phenotype','Abbreviation','Authors','Title',\
+			#	'Abstract', 'Journal','Volume','Pages','Month','Year','Sequence',\
+			#	'Units', 'comments']
+			query = '''
+				SELECT 
+					PublishXRef.Id, Publication.PubMed_ID, 
+					Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description, 
+					Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, 
+					Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
+					Publication.Authors, Publication.Title, Publication.Abstract, 
+					Publication.Journal, Publication.Volume, Publication.Pages, 
+					Publication.Month, Publication.Year, PublishXRef.Sequence, 
+					Phenotype.Units, PublishXRef.comments 
+				FROM 
+					PublishXRef, Publication, Phenotype, PublishFreeze 
+				WHERE 
+					PublishXRef.Id = %s AND 
+					Phenotype.Id = PublishXRef.PhenotypeId AND 
+					Publication.Id = PublishXRef.PublicationId AND 
+					PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND 
+					PublishFreeze.Id =%s
+				''' % (self.name, self.db.id)
+		#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
+		#XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
+		elif self.db.type == 'ProbeSet':
+			disfieldString = string.join(self.db.disfield,',ProbeSet.')
+			disfieldString = 'ProbeSet.' + disfieldString
+			query = """
+				SELECT %s 
+				FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+				WHERE
+					ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+					ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+					ProbeSetFreeze.Name = '%s' AND
+					ProbeSet.Name = '%s'
+				""" % (disfieldString, self.db.name, self.name)
+		#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
+		# to avoid the problem of same marker name from different species.
+		elif self.db.type == 'Geno':
+			disfieldString = string.join(self.db.disfield,',Geno.')
+			disfieldString = 'Geno.' + disfieldString
+			query = """
+				SELECT %s 
+				FROM Geno, GenoFreeze, GenoXRef
+				WHERE
+					GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+					GenoXRef.GenoId = Geno.Id AND
+					GenoFreeze.Name = '%s' AND
+					Geno.Name = '%s'
+				""" % (disfieldString, self.db.name, self.name)
+		else: #Temp type
+			query = 'SELECT %s FROM %s WHERE Name = "%s"' % \
+				(string.join(self.db.disfield,','), self.db.type, self.name)
+
+		
+		self.cursor.execute(query)
+		traitInfo = self.cursor.fetchone()
+		if traitInfo:
+			self.haveinfo = 1
+
+			#XZ: assign SQL query result to trait attributes.
+			for i, field in enumerate(self.db.disfield):
+				setattr(self, field, traitInfo[i])
+
+			if self.db.type == 'Publish':
+				self.confidential = 0
+				if self.pre_publication_description and not self.pubmed_id:
+					self.confidential = 1
+
+			self.homologeneid = None
+			if self.db.type == 'ProbeSet' and self.riset and self.geneid:
+				#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
+				#XZ: So I have to test if geneid is number before execute the query.
+				#XZ: The geneid values in database should be cleaned up.
+				try:
+					junk = float(self.geneid)
+					geneidIsNumber = 1
+				except:
+					geneidIsNumber = 0
+
+				if geneidIsNumber:
+					query = """
+						SELECT
+							HomologeneId
+						FROM
+							Homologene, Species, InbredSet
+						WHERE
+							Homologene.GeneId =%s AND
+							InbredSet.Name = '%s' AND
+							InbredSet.SpeciesId = Species.Id AND
+							Species.TaxonomyId = Homologene.TaxonomyId
+						""" % (self.geneid, self.riset)
+					self.cursor.execute(query)
+					result = self.cursor.fetchone()
+				else:
+					result = None
+
+				if result:
+					self.homologeneid = result[0]
+
+			if QTL:
+				if self.db.type == 'ProbeSet' and not self.cellid:
+					query = '''
+						SELECT 
+							ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean 
+						FROM 
+							ProbeSetXRef, ProbeSet
+						WHERE 
+							ProbeSetXRef.ProbeSetId = ProbeSet.Id AND 
+							ProbeSet.Name = "%s" AND
+							ProbeSetXRef.ProbeSetFreezeId =%s
+						''' % (self.name, self.db.id)
+					self.cursor.execute(query)
+					traitQTL = self.cursor.fetchone()
+					if traitQTL:
+						self.locus, self.lrs, self.pvalue, self.mean = traitQTL
+					else:
+						self.locus = self.lrs = self.pvalue = self.mean = ""
+				if self.db.type == 'Publish':
+					query = '''
+						SELECT
+							PublishXRef.Locus, PublishXRef.LRS
+						FROM
+							PublishXRef, PublishFreeze
+						WHERE
+							PublishXRef.Id = %s AND
+							PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+							PublishFreeze.Id =%s
+						''' % (self.name, self.db.id)
+					self.cursor.execute(query)
+					traitQTL = self.cursor.fetchone()
+					if traitQTL:
+						self.locus, self.lrs = traitQTL
+					else:
+						self.locus = self.lrs = ""
+		else:
+			raise KeyError, `self.name`+' information is not found in the database.'
+	
+	def genHTML(self, formName = "", dispFromDatabase=0, privilege="guest", userName="Guest", authorized_users=""):
+		if not self.haveinfo:
+			self.retrieveInfo()
+		
+		if self.db.type == 'Publish':
+			PubMedLink = ""
+	 		if self.pubmed_id:
+	 			PubMedLink = HT.Href(text="PubMed %d : " % self.pubmed_id,
+	 			target = "_blank", url = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id)
+			else:
+				PubMedLink = HT.Span("Unpublished : ", Class="fs15")
+			
+			if formName:
+				setDescription2 = HT.Href(url="javascript:showDatabase3('%s','%s','%s','')" % 
+				(formName, self.db.name, self.name), Class = "fs14")
+			else:
+				setDescription2 = HT.Href(url="javascript:showDatabase2('%s','%s','')" % 
+				(self.db.name,self.name), Class = "fs14")
+	
+			if self.confidential and not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=privilege, userName=userName, authorized_users=authorized_users):
+					setDescription2.append('RecordID/%s - %s' % (self.name, self.pre_publication_description))
+			else:
+				setDescription2.append('RecordID/%s - %s' % (self.name, self.post_publication_description))
+
+			#XZ 03/26/2011: Xiaodong comment out the following two lins as Rob asked. Need to check with Rob why in PublishXRef table, there are few row whose Sequence > 1.
+			#if self.sequence > 1:
+			#	setDescription2.append(' btach %d' % self.sequence)
+			if self.authors:
+				a1 = string.split(self.authors,',')[0]
+				while a1[0] == '"' or a1[0] == "'" :
+					a1 = a1[1:]
+				setDescription2.append(' by ')
+				setDescription2.append(HT.Italic('%s, and colleagues' % a1))
+			setDescription = HT.Span(PubMedLink, setDescription2)
+			
+		elif self.db.type == 'Temp':
+			setDescription = HT.Href(text="%s" % (self.description),url="javascript:showDatabase2\
+			('%s','%s','')" % (self.db.name,self.name), Class = "fs14")
+			setDescription = HT.Span(setDescription)
+			
+		elif self.db.type == 'Geno': # Genome DB only available for single search
+			if formName:
+				setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
+			'%2.3f' % self.mb),url="javascript:showDatabase3('%s','%s','%s','')" % \
+			(formName, self.db.name, self.name), Class = "fs14")
+			else:
+				setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
+			'%2.3f' % self.mb),url="javascript:showDatabase2('%s','%s','')" % \
+			(self.db.name,self.name), Class = "fs14")
+				
+			setDescription = HT.Span(setDescription)
+			
+		else:
+			if self.cellid:
+				if formName:	
+					setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name, self.cellid),url=\
+				"javascript:showDatabase3('%s','%s','%s','%s')" % (formName, self.db.name,self.name,self.cellid), \
+				Class = "fs14")
+				else:	
+					setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name,self.cellid),url=\
+				"javascript:showDatabase2('%s','%s','%s')" % (self.db.name,self.name,self.cellid), \
+				Class = "fs14")
+			else:
+				if formName:
+					setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
+				"javascript:showDatabase3('%s','%s','%s','')" % (formName, self.db.name,self.name), \
+				Class = "fs14")
+				else:
+					setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
+				"javascript:showDatabase2('%s','%s','')" % (self.db.name,self.name), \
+				Class = "fs14")
+			if self.symbol and self.chr and self.mb:
+				setDescription.append(' [')
+				setDescription.append(HT.Italic('%s' % self.symbol,Class="cdg fwb"))
+				setDescription.append(' on Chr %s @ %s Mb]' % (self.chr,self.mb))
+			if self.description:
+				setDescription.append(': %s' % self.description)
+			if self.probe_target_description:
+				setDescription.append('; %s' % self.probe_target_description)
+			setDescription = HT.Span(setDescription)
+			
+		if self.db.type != 'Temp' and dispFromDatabase:
+			setDescription.append( ' --- FROM : ')
+			setDescription.append(self.db.genHTML(Class='cori'))
+		return setDescription
+	
+