Add all the source codes into the github.

author: root 2012-05-08 18:39:56 -0500
committer: root 2012-05-08 18:39:56 -0500
commit: ea46f42ee640928b92947bfb204c41a482d80937 (patch)
tree: 9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/qtlminer/GeneUtil.py
parent: 056b5253fc3857b0444382aa39944f6344dc1ceb (diff)
download: genenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz
1 files changed, 658 insertions, 0 deletions
diff --git a/web/webqtl/qtlminer/GeneUtil.py b/web/webqtl/qtlminer/GeneUtil.py
new file mode 100755
index 00000000..3ae7f3c0
--- /dev/null
+++ b/web/webqtl/qtlminer/GeneUtil.py
@@ -0,0 +1,658 @@
+import string
+import os
+
+
+from base import webqtlConfig
+
+
+#Just return a list of dictionaries
+#each dictionary contains sub-dictionary
+def loadGenes(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse'):
+	#cursor.execute("desc GeneList")
+	#results = cursor.fetchall()
+	#fetchFields = map(lambda X:X[0], results)
+	fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', 
+	'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', 
+	'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
+	
+	##List All Species in the Gene Table
+	speciesDict = {}
+	cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
+			GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
+	results = cursor.fetchall()
+	for item in results:
+		speciesDict[item[0]] = item[1]
+	
+	##List current Species and other Species
+	speciesId = speciesDict[species]
+	otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
+	otherSpecies.remove([species, speciesId])
+
+	cursor.execute("""SELECT %s from GeneList 
+						where 
+					SpeciesId = %d AND Chromosome = '%s' AND
+					((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
+					order by txStart
+					""" 
+					% (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
+	results = cursor.fetchall()
+	GeneList = []
+
+	if results:
+		for result in results:
+			newdict = {}
+			for j, item in enumerate(fetchFields):
+				newdict[item] = result[j]
+			#count SNPs if possible	
+			if diffCol and species=='mouse':
+				cursor.execute("""
+					select 
+						count(*) from BXDSnpPosition
+					where 
+						Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+						StrainId1 = %d AND StrainId2 = %d
+				""" % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+				newdict["snpCount"] = cursor.fetchone()[0]
+				newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
+			else:
+				newdict["snpDensity"] = newdict["snpCount"] = 0
+			
+			try:
+				newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
+			except:
+				pass
+			
+			#load gene from other Species by the same name
+			for item in otherSpecies:
+				othSpec, othSpecId = item
+				newdict2 = {}
+				
+				cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" % 
+							(string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
+				resultsOther = cursor.fetchone()
+				if resultsOther:
+					for j, item in enumerate(fetchFields):
+						newdict2[item] = resultsOther[j]
+							
+					#count SNPs if possible, could be a separate function	
+					if diffCol and othSpec == 'mouse':
+						cursor.execute("""
+							select
+								count(*) from BXDSnpPosition
+							where
+								Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+								StrainId1 = %d AND StrainId2 = %d
+							""" % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+
+
+
+						newdict2["snpCount"] = cursor.fetchone()[0]
+						newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
+					else:
+						newdict2["snpDensity"] = newdict2["snpCount"] = 0
+						
+					try:
+						newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
+					except:
+						pass
+						
+				newdict['%sGene' % othSpec] = newdict2
+				
+			GeneList.append(newdict)
+
+	return GeneList
+
+
+
+
+
+
+def loadGenesForQTLminer(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse', databaseA='HC_M2_0606_P', databaseB='HC_M2CB_1205_R', databaseC='Illum_LXS_Hipp_loess0807', str1='C57BL/6J', str2='DBA/2J'):
+	#cursor.execute("desc GeneList")
+	#results = cursor.fetchall()
+	#fetchFields = map(lambda X:X[0], results)
+	fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', 
+	'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', 
+	'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
+	
+	##List All Species in the Gene Table
+	speciesDict = {}
+	cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
+			GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
+	results = cursor.fetchall()
+	for item in results:
+		speciesDict[item[0]] = item[1]
+
+
+#		fpText = open(os.path.join(webqtlConfig.TMPDIR, "strains") + str(j) + '.txt','wb')
+#		fpText.write("strain:  '%d'  \n" % thisone  )
+#		fpText.close()
+#		strainids.append(thisone)
+
+
+
+	
+	##List current Species and other Species
+	speciesId = speciesDict[species]
+	otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
+	otherSpecies.remove([species, speciesId])
+
+	cursor.execute("""SELECT %s from GeneList 
+						where 
+					SpeciesId = %d AND Chromosome = '%s' AND
+					((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
+					order by txStart
+					""" 
+					% (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
+	results = cursor.fetchall()
+	GeneList = []
+	
+	if results:
+		for result in results:
+			newdict = {}
+			for j, item in enumerate(fetchFields):
+				newdict[item] = result[j]
+
+## get pathways
+
+			cursor.execute("""
+			    select 
+					pathway						
+				FROM
+				    kegg.mmuflat
+				where 
+					gene = '%s' 
+				""" % (newdict["GeneID"]) )
+				
+			resAAA = cursor.fetchall()
+			if resAAA:
+				myFields = ['pathways']
+				for j, item in enumerate(myFields):
+					temp = []
+					for k in resAAA:
+						temp.append(k[j])
+					newdict["pathways"] = temp 
+			
+			cursor.execute("""
+			    select 
+					name						
+				FROM
+				    kegg.mmuflat
+				where 
+					gene = '%s' 
+				""" % (newdict["GeneID"]) )
+				
+			resAAA = cursor.fetchall()
+			if resAAA:
+				myFields = ['pathwaynames']
+				for j, item in enumerate(myFields):
+					temp = []
+					for k in resAAA:
+						temp.append(k[j])
+					newdict["pathwaynames"] = temp 
+
+## get GO terms
+
+			cursor.execute("""
+			    SELECT
+				  distinct go.term.name
+				FROM   go.gene_product
+				  INNER JOIN go.dbxref ON (go.gene_product.dbxref_id=go.dbxref.id)
+				  INNER JOIN go.association ON (go.gene_product.id=go.association.gene_product_id)
+				  INNER JOIN go.term ON (go.association.term_id=go.term.id)
+				WHERE
+				  go.dbxref.xref_key = (select mgi from go.genemgi where gene='%s' limit 1)
+				AND
+				  go.dbxref.xref_dbname = 'MGI'
+				AND
+				  go.term.term_type='biological_process'
+				""" % (newdict["GeneID"]) )
+
+			resAAA = cursor.fetchall()
+			if resAAA:
+				myFields = ['goterms']
+				for j, item in enumerate(myFields):
+					temp = []
+					for k in resAAA:
+						temp.append(k[j])
+					newdict["goterms"] = temp 
+			
+
+
+
+
+
+			newdict["snpDensity"] = newdict["snpCount"] = newdict["snpCountall"] = newdict["snpCountmis"] = newdict["snpCountBXD"] = newdict["snpCountmissel"] = 0
+
+			#count SNPs if possible	
+			if diffCol and species=='mouse':
+				cursor.execute("""
+					select 
+						count(*) from BXDSnpPosition
+					where 
+						Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+						StrainId1 = %d AND StrainId2 = %d
+				""" % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+				newdict["snpCount"] = cursor.fetchone()[0]
+				newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
+			else:
+				newdict["snpDensity"] = newdict["snpCount"] = 0
+			
+			try:
+				newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
+			except:
+				pass
+
+
+
+#self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", opt.geneName)
+
+
+
+			
+			## search with gene name... doesnt matter. it changed to start and end position anyway
+			##self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", newdict["GeneSymbol"])
+
+
+			#count SNPs for all strains
+			cursor.execute("""
+			     SELECT 
+				distinct SnpAll.Id
+			     from 
+			        SnpAll 
+			     where 
+			        SpeciesId = '1' and SnpAll.Chromosome = '%s' AND 
+				    SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f AND
+				    SnpAll.Exon='Y'
+				""" % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"]))
+			snpfetch = cursor.fetchall()
+			newdict["snpCountmis"] = len(snpfetch)
+
+## 			# count SNPs for selected strains
+			
+			sql = """SELECT 
+					distinct SnpAll.Id, `%s`, `%s`
+				from 
+					SnpAll, SnpPattern 
+				where 
+					SpeciesId = '1' and SnpAll.Chromosome = '%s' AND 
+					SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f and SnpAll.Id = SnpPattern.SnpId AND 
+					SnpPattern.`%s` != SnpPattern.`%s` AND
+					SnpAll.Exon='Y'
+					""" % (str1, str2, newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"], str1, str2)
+			cursor.execute(sql)
+			ressnp = cursor.fetchall()
+			newdict["snpCountmissel"] = len(ressnp)
+			newdict["hassnp"] = 'n'
+			if len(ressnp)>0 :
+				newdict["hassnp"]= 'y'
+##          ####################################### NEW NEW NEW
+
+
+
+
+
+
+
+			# count Indels for BXD mice
+			cursor.execute("""
+				SELECT 
+				   distinct IndelAll.Name, IndelAll.Chromosome, IndelAll.SourceId, IndelAll.Mb_start,
+				   IndelAll.Mb_end, IndelAll.Strand, IndelAll.Type, IndelAll.Size, IndelAll.InDelSequence,
+				   SnpSource.Name  
+				from 
+				   SnpSource, IndelAll
+				where 
+				   IndelAll.SpeciesId = '1' and IndelAll.Chromosome = '%s' AND 
+				   IndelAll.Mb_start >= %2.6f and IndelAll.Mb_start < (%2.6f+.0010) AND
+				   SnpSource.Id = IndelAll.SourceId 
+				   order by IndelAll.Mb_start
+				""" % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"]))
+				
+			ressnp = cursor.fetchall()
+			newdict["indelCountBXD"] = len(ressnp)
+			newdict["hasindel"] = 'n'
+			newdict["hasexpr"] = 'n'
+			newdict["hascis"] = 'n'
+			newdict["score"] = 0
+			if len(ressnp)>0 :
+				newdict["hasindel"]= 'y'
+
+## #			cursor.execute("""
+## #				select 
+## #					Name from ProbeSet
+## #				where 
+## #					GeneId = '%s' AND ChipId=4 limit 1
+## #			""" % (newdict["GeneID"]))
+## #			if species=='mouse':
+## #				cursor.execute("""
+## #					select 
+## #						Name from ProbeSet
+## #					where 
+## #						GeneId = '%s' AND ChipId=4
+## #				""" % (newdict["GeneID"]))
+## #				results = cursor.fetchall()
+## #				psets = []
+## #				for item in results:
+## #					psets.append(item)
+## #				newdict["probeset"] = psets 
+## #				
+## #			else:
+## #				newdict["probeset"] = "empty"
+
+
+
+
+			if species=='mouse':
+				cursor.execute("""
+					select 
+						distinct 0,
+						ProbeSet.Name as TNAME,
+						round(ProbeSetXRef.Mean,1) as TMEAN,
+						round(ProbeSetXRef.LRS,1) as TLRS,
+						ProbeSet.Chr_num as TCHR_NUM,
+						ProbeSet.Mb as TMB,
+						ProbeSet.Symbol as TSYMBOL,
+						ProbeSet.name_num as TNAME_NUM
+						FROM  ProbeSetXRef, ProbeSetFreeze, ProbeSet
+					where 
+						( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+						alias,GenbankId,UniGeneId, Probe_Target_Description)
+						AGAINST ('%s' IN BOOLEAN MODE) )
+						and ProbeSet.symbol = '%s'
+						and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+						and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+						and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+				""" % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA))
+				resA = cursor.fetchall()
+				
+				if resA:
+					myFields = ['dummyA','probesetA','meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA']
+
+#					fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
+					#fpText.write("newdictgeneid  '%s'  \n" % newdict["GeneId"])
+					for j, item in enumerate(myFields):
+						temp = []
+						for k in resA:
+							#							fpText.write("j: result:  '%s'  \n" % k[j])
+							temp.append(k[j])
+						newdict[item] = temp 
+					#					fpText.close()
+
+
+					# put probesetcisA here
+				
+					cursor.execute("""
+					select 
+						distinct 0,
+						if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000  and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000  ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
+						FROM  Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
+					where 
+						( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+						alias,GenbankId,UniGeneId, Probe_Target_Description)
+						AGAINST ('%s' IN BOOLEAN MODE) )
+						and ProbeSet.symbol = '%s'
+						and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+						and Geno.SpeciesId=1 #XZ: I add this line to speed up query
+						and ProbeSetXRef.Locus = Geno.name
+						and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+						and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+						""" % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA))
+
+					resA2 = cursor.fetchall()
+					if resA2:
+						myFields = ['dummyA2','probesetcisA']
+						for j, item in enumerate(myFields):
+							temp = []
+							for k in resA2:
+								#							fpText.write("j: result:  '%s'  \n" % k[j])
+								temp.append(k[j])
+							newdict[item] = temp 
+					else:
+						newdict['probesetcisA'] = ''
+
+
+
+					# specially for this dataset only
+					newdict["hasexpr"] = 'n'
+					if len(newdict["meanA"])>0:
+						for mym in newdict["meanA"]:
+							if mym>8:
+								newdict["hasexpr"] = 'y'
+
+					# specially for this dataset only
+					newdict["hascis"] = 'n'
+					if len(newdict["probesetcisA"])>0:
+						for mym in newdict["probesetcisA"]:
+							if mym != 'no':
+								newdict["hascis"] = 'y'
+			
+			else:
+				myFields = ['dummyA','probesetA,''meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA', 'probesetcisA']
+				for j, item in enumerate(myFields):
+					newdict[item] = "--"
+
+				# specially for this dataset only
+				newdict["hasexpr"] = 'n'
+				newdict["hascis"] = 'n'
+				newdict["score"] = 0
+
+##########################  FOR B
+
+			newdict["score"] = 0
+			if newdict["hassnp"] == 'y':
+				newdict["score"] = newdict["score"] + 1					
+			if newdict["hasexpr"] == 'y':
+				newdict["score"] = newdict["score"] + 1					
+			if newdict["hasindel"] == 'y':
+				newdict["score"] = newdict["score"] + 1					
+			if newdict["hascis"] == 'y':
+				newdict["score"] = newdict["score"] + 1					
+							
+							
+					
+			if species=='mouse':
+				cursor.execute("""
+					select 
+						distinct 0,
+						ProbeSet.Name as TNAME,
+						round(ProbeSetXRef.Mean,1) as TMEAN,
+						round(ProbeSetXRef.LRS,1) as TLRS,
+						ProbeSet.Chr_num as TCHR_NUM,
+						ProbeSet.Mb as TMB,
+						ProbeSet.Symbol as TSYMBOL,
+						ProbeSet.name_num as TNAME_NUM
+						FROM  ProbeSetXRef, ProbeSetFreeze, ProbeSet
+					where 
+						( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+						alias,GenbankId,UniGeneId, Probe_Target_Description)
+						AGAINST ('%s' IN BOOLEAN MODE) )
+						and ProbeSet.symbol = '%s'
+						and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+						and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+						and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+				""" % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB))
+
+				resB = cursor.fetchall()
+				if resB:
+					myFields = ['dummyB','probesetB','meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB']
+
+#					fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
+					#fpText.write("newdictgeneid  '%s'  \n" % newdict["GeneId"])
+					for j, item in enumerate(myFields):
+						temp = []
+						for k in resB:
+							#							fpText.write("j: result:  '%s'  \n" % k[j])
+							temp.append(k[j])
+						newdict[item] = temp 
+					#					fpText.close()
+
+
+					# put probesetcisB here
+					cursor.execute("""
+					select 
+						distinct 0,
+						if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000  and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000  ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
+						FROM  Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
+					where 
+						( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+						alias,GenbankId,UniGeneId, Probe_Target_Description)
+						AGAINST ('%s' IN BOOLEAN MODE) )
+						and ProbeSet.symbol = '%s'
+						and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+						and Geno.SpeciesId=1 #XZ: I add this line to speed up query
+						and ProbeSetXRef.Locus = Geno.name
+						and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+						and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+						""" % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB))
+
+					resB2 = cursor.fetchall()
+					if resB2:
+						myFields = ['dummyB2','probesetcisB']
+						for j, item in enumerate(myFields):
+							temp = []
+							for k in resB2:
+								#							fpText.write("j: result:  '%s'  \n" % k[j])
+								temp.append(k[j])
+							newdict[item] = temp 
+					else:
+						newdict['probesetcisB'] = ''
+
+				
+			else:
+				myFields = ['dummyB','probesetB,''meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB', 'probesetcisB']
+				for j, item in enumerate(myFields):
+					newdict[item] = "--"
+
+
+
+##########################
+
+
+##########################  FOR C
+
+					
+			if species=='mouse':
+				cursor.execute("""
+					select 
+						distinct 0,
+						ProbeSet.Name as TNAME,
+						round(ProbeSetXRef.Mean,1) as TMEAN,
+						round(ProbeSetXRef.LRS,1) as TLRS,
+						ProbeSet.Chr_num as TCHR_NUM,
+						ProbeSet.Mb as TMB,
+						ProbeSet.Symbol as TSYMBOL,
+						ProbeSet.name_num as TNAME_NUM
+						FROM  ProbeSetXRef, ProbeSetFreeze, ProbeSet
+					where 
+						( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+						alias,GenbankId,UniGeneId, Probe_Target_Description)
+						AGAINST ('%s' IN BOOLEAN MODE) )
+						and ProbeSet.symbol = '%s'
+						and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+						and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+						and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+				""" % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC))
+
+				resC = cursor.fetchall()
+				if resC:
+					myFields = ['dummyC','probesetC','meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC']
+
+#					fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
+					#fpText.write("newdictgeneid  '%s'  \n" % newdict["GeneId"])
+					for j, item in enumerate(myFields):
+						temp = []
+						for k in resC:
+							#							fpText.write("j: result:  '%s'  \n" % k[j])
+							temp.append(k[j])
+						newdict[item] = temp 
+					#					fpText.close()
+
+
+					# put probesetcisC here
+					cursor.execute("""
+					select 
+						distinct 0,
+						if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000  and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000  ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
+						FROM  Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
+					where 
+						( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+						alias,GenbankId,UniGeneId, Probe_Target_Description)
+						AGAINST ('%s' IN BOOLEAN MODE) )
+						and ProbeSet.symbol = '%s'
+						and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+						and Geno.SpeciesId=1 #XZ: I add this line to speed up query
+						and ProbeSetXRef.Locus = Geno.name
+						and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+						and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+						""" % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC))
+
+					resC2 = cursor.fetchall()
+					if resC2:
+						myFields = ['dummyC2','probesetcisC']
+						for j, item in enumerate(myFields):
+							temp = []
+							for k in resC2:
+								#							fpText.write("j: result:  '%s'  \n" % k[j])
+								temp.append(k[j])
+							newdict[item] = temp 
+					else:
+						newdict['probesetcisC'] = ''
+
+			else:
+				myFields = ['dummyC','probesetC,''meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC', 'probesetcisC']
+				for j, item in enumerate(myFields):
+					newdict[item] = "--"
+
+
+			             
+			
+			
+
+
+			
+			#load gene from other Species by the same name
+			
+			
+			for item in otherSpecies:
+				othSpec, othSpecId = item
+				newdict2 = {}
+				
+				cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" % 
+							(string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
+				resultsOther = cursor.fetchone()
+				if resultsOther:
+					for j, item in enumerate(fetchFields):
+						newdict2[item] = resultsOther[j]
+							
+					#count SNPs if possible, could be a separate function	
+					if diffCol and othSpec == 'mouse':
+						cursor.execute("""
+							select
+								count(*) from BXDSnpPosition
+							where
+								Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+								StrainId1 = %d AND StrainId2 = %d
+							""" % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+
+
+						newdict2["snpCount"] = cursor.fetchone()[0]
+						newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
+					else:
+						newdict2["snpDensity"] = newdict2["snpCount"] = 0
+						
+					try:
+						newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
+					except:
+						pass
+						
+				newdict['%sGene' % othSpec] = newdict2
+
+			#newdict['RUDI']='hallo allemaal'
+				
+			GeneList.append(newdict)
+
+					
+	return GeneList
+
+
author	root	2012-05-08 18:39:56 -0500
committer	root	2012-05-08 18:39:56 -0500
commit	ea46f42ee640928b92947bfb204c41a482d80937 (patch)
tree	9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/qtlminer/GeneUtil.py
parent	056b5253fc3857b0444382aa39944f6344dc1ceb (diff)
download	genenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz