aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/qtlminer/GeneUtil.py
diff options
context:
space:
mode:
Diffstat (limited to 'web/webqtl/qtlminer/GeneUtil.py')
-rwxr-xr-xweb/webqtl/qtlminer/GeneUtil.py658
1 files changed, 658 insertions, 0 deletions
diff --git a/web/webqtl/qtlminer/GeneUtil.py b/web/webqtl/qtlminer/GeneUtil.py
new file mode 100755
index 00000000..3ae7f3c0
--- /dev/null
+++ b/web/webqtl/qtlminer/GeneUtil.py
@@ -0,0 +1,658 @@
+import string
+import os
+
+
+from base import webqtlConfig
+
+
+#Just return a list of dictionaries
+#each dictionary contains sub-dictionary
+def loadGenes(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse'):
+ #cursor.execute("desc GeneList")
+ #results = cursor.fetchall()
+ #fetchFields = map(lambda X:X[0], results)
+ fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd',
+ 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID',
+ 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
+
+ ##List All Species in the Gene Table
+ speciesDict = {}
+ cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
+ GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
+ results = cursor.fetchall()
+ for item in results:
+ speciesDict[item[0]] = item[1]
+
+ ##List current Species and other Species
+ speciesId = speciesDict[species]
+ otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
+ otherSpecies.remove([species, speciesId])
+
+ cursor.execute("""SELECT %s from GeneList
+ where
+ SpeciesId = %d AND Chromosome = '%s' AND
+ ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
+ order by txStart
+ """
+ % (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
+ results = cursor.fetchall()
+ GeneList = []
+
+ if results:
+ for result in results:
+ newdict = {}
+ for j, item in enumerate(fetchFields):
+ newdict[item] = result[j]
+ #count SNPs if possible
+ if diffCol and species=='mouse':
+ cursor.execute("""
+ select
+ count(*) from BXDSnpPosition
+ where
+ Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+ StrainId1 = %d AND StrainId2 = %d
+ """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+ newdict["snpCount"] = cursor.fetchone()[0]
+ newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
+ else:
+ newdict["snpDensity"] = newdict["snpCount"] = 0
+
+ try:
+ newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
+ except:
+ pass
+
+ #load gene from other Species by the same name
+ for item in otherSpecies:
+ othSpec, othSpecId = item
+ newdict2 = {}
+
+ cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" %
+ (string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
+ resultsOther = cursor.fetchone()
+ if resultsOther:
+ for j, item in enumerate(fetchFields):
+ newdict2[item] = resultsOther[j]
+
+ #count SNPs if possible, could be a separate function
+ if diffCol and othSpec == 'mouse':
+ cursor.execute("""
+ select
+ count(*) from BXDSnpPosition
+ where
+ Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+ StrainId1 = %d AND StrainId2 = %d
+ """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+
+
+
+ newdict2["snpCount"] = cursor.fetchone()[0]
+ newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
+ else:
+ newdict2["snpDensity"] = newdict2["snpCount"] = 0
+
+ try:
+ newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
+ except:
+ pass
+
+ newdict['%sGene' % othSpec] = newdict2
+
+ GeneList.append(newdict)
+
+ return GeneList
+
+
+
+
+
+
+def loadGenesForQTLminer(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse', databaseA='HC_M2_0606_P', databaseB='HC_M2CB_1205_R', databaseC='Illum_LXS_Hipp_loess0807', str1='C57BL/6J', str2='DBA/2J'):
+ #cursor.execute("desc GeneList")
+ #results = cursor.fetchall()
+ #fetchFields = map(lambda X:X[0], results)
+ fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd',
+ 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID',
+ 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
+
+ ##List All Species in the Gene Table
+ speciesDict = {}
+ cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
+ GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
+ results = cursor.fetchall()
+ for item in results:
+ speciesDict[item[0]] = item[1]
+
+
+# fpText = open(os.path.join(webqtlConfig.TMPDIR, "strains") + str(j) + '.txt','wb')
+# fpText.write("strain: '%d' \n" % thisone )
+# fpText.close()
+# strainids.append(thisone)
+
+
+
+
+ ##List current Species and other Species
+ speciesId = speciesDict[species]
+ otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
+ otherSpecies.remove([species, speciesId])
+
+ cursor.execute("""SELECT %s from GeneList
+ where
+ SpeciesId = %d AND Chromosome = '%s' AND
+ ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
+ order by txStart
+ """
+ % (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
+ results = cursor.fetchall()
+ GeneList = []
+
+ if results:
+ for result in results:
+ newdict = {}
+ for j, item in enumerate(fetchFields):
+ newdict[item] = result[j]
+
+## get pathways
+
+ cursor.execute("""
+ select
+ pathway
+ FROM
+ kegg.mmuflat
+ where
+ gene = '%s'
+ """ % (newdict["GeneID"]) )
+
+ resAAA = cursor.fetchall()
+ if resAAA:
+ myFields = ['pathways']
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resAAA:
+ temp.append(k[j])
+ newdict["pathways"] = temp
+
+ cursor.execute("""
+ select
+ name
+ FROM
+ kegg.mmuflat
+ where
+ gene = '%s'
+ """ % (newdict["GeneID"]) )
+
+ resAAA = cursor.fetchall()
+ if resAAA:
+ myFields = ['pathwaynames']
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resAAA:
+ temp.append(k[j])
+ newdict["pathwaynames"] = temp
+
+## get GO terms
+
+ cursor.execute("""
+ SELECT
+ distinct go.term.name
+ FROM go.gene_product
+ INNER JOIN go.dbxref ON (go.gene_product.dbxref_id=go.dbxref.id)
+ INNER JOIN go.association ON (go.gene_product.id=go.association.gene_product_id)
+ INNER JOIN go.term ON (go.association.term_id=go.term.id)
+ WHERE
+ go.dbxref.xref_key = (select mgi from go.genemgi where gene='%s' limit 1)
+ AND
+ go.dbxref.xref_dbname = 'MGI'
+ AND
+ go.term.term_type='biological_process'
+ """ % (newdict["GeneID"]) )
+
+ resAAA = cursor.fetchall()
+ if resAAA:
+ myFields = ['goterms']
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resAAA:
+ temp.append(k[j])
+ newdict["goterms"] = temp
+
+
+
+
+
+
+ newdict["snpDensity"] = newdict["snpCount"] = newdict["snpCountall"] = newdict["snpCountmis"] = newdict["snpCountBXD"] = newdict["snpCountmissel"] = 0
+
+ #count SNPs if possible
+ if diffCol and species=='mouse':
+ cursor.execute("""
+ select
+ count(*) from BXDSnpPosition
+ where
+ Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+ StrainId1 = %d AND StrainId2 = %d
+ """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+ newdict["snpCount"] = cursor.fetchone()[0]
+ newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
+ else:
+ newdict["snpDensity"] = newdict["snpCount"] = 0
+
+ try:
+ newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
+ except:
+ pass
+
+
+
+#self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", opt.geneName)
+
+
+
+
+ ## search with gene name... doesnt matter. it changed to start and end position anyway
+ ##self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", newdict["GeneSymbol"])
+
+
+ #count SNPs for all strains
+ cursor.execute("""
+ SELECT
+ distinct SnpAll.Id
+ from
+ SnpAll
+ where
+ SpeciesId = '1' and SnpAll.Chromosome = '%s' AND
+ SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f AND
+ SnpAll.Exon='Y'
+ """ % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"]))
+ snpfetch = cursor.fetchall()
+ newdict["snpCountmis"] = len(snpfetch)
+
+## # count SNPs for selected strains
+
+ sql = """SELECT
+ distinct SnpAll.Id, `%s`, `%s`
+ from
+ SnpAll, SnpPattern
+ where
+ SpeciesId = '1' and SnpAll.Chromosome = '%s' AND
+ SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f and SnpAll.Id = SnpPattern.SnpId AND
+ SnpPattern.`%s` != SnpPattern.`%s` AND
+ SnpAll.Exon='Y'
+ """ % (str1, str2, newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"], str1, str2)
+ cursor.execute(sql)
+ ressnp = cursor.fetchall()
+ newdict["snpCountmissel"] = len(ressnp)
+ newdict["hassnp"] = 'n'
+ if len(ressnp)>0 :
+ newdict["hassnp"]= 'y'
+## ####################################### NEW NEW NEW
+
+
+
+
+
+
+
+ # count Indels for BXD mice
+ cursor.execute("""
+ SELECT
+ distinct IndelAll.Name, IndelAll.Chromosome, IndelAll.SourceId, IndelAll.Mb_start,
+ IndelAll.Mb_end, IndelAll.Strand, IndelAll.Type, IndelAll.Size, IndelAll.InDelSequence,
+ SnpSource.Name
+ from
+ SnpSource, IndelAll
+ where
+ IndelAll.SpeciesId = '1' and IndelAll.Chromosome = '%s' AND
+ IndelAll.Mb_start >= %2.6f and IndelAll.Mb_start < (%2.6f+.0010) AND
+ SnpSource.Id = IndelAll.SourceId
+ order by IndelAll.Mb_start
+ """ % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"]))
+
+ ressnp = cursor.fetchall()
+ newdict["indelCountBXD"] = len(ressnp)
+ newdict["hasindel"] = 'n'
+ newdict["hasexpr"] = 'n'
+ newdict["hascis"] = 'n'
+ newdict["score"] = 0
+ if len(ressnp)>0 :
+ newdict["hasindel"]= 'y'
+
+## # cursor.execute("""
+## # select
+## # Name from ProbeSet
+## # where
+## # GeneId = '%s' AND ChipId=4 limit 1
+## # """ % (newdict["GeneID"]))
+## # if species=='mouse':
+## # cursor.execute("""
+## # select
+## # Name from ProbeSet
+## # where
+## # GeneId = '%s' AND ChipId=4
+## # """ % (newdict["GeneID"]))
+## # results = cursor.fetchall()
+## # psets = []
+## # for item in results:
+## # psets.append(item)
+## # newdict["probeset"] = psets
+## #
+## # else:
+## # newdict["probeset"] = "empty"
+
+
+
+
+ if species=='mouse':
+ cursor.execute("""
+ select
+ distinct 0,
+ ProbeSet.Name as TNAME,
+ round(ProbeSetXRef.Mean,1) as TMEAN,
+ round(ProbeSetXRef.LRS,1) as TLRS,
+ ProbeSet.Chr_num as TCHR_NUM,
+ ProbeSet.Mb as TMB,
+ ProbeSet.Symbol as TSYMBOL,
+ ProbeSet.name_num as TNAME_NUM
+ FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet
+ where
+ ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+ alias,GenbankId,UniGeneId, Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE) )
+ and ProbeSet.symbol = '%s'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+ """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA))
+ resA = cursor.fetchall()
+
+ if resA:
+ myFields = ['dummyA','probesetA','meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA']
+
+# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
+ #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"])
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resA:
+ # fpText.write("j: result: '%s' \n" % k[j])
+ temp.append(k[j])
+ newdict[item] = temp
+ # fpText.close()
+
+
+ # put probesetcisA here
+
+ cursor.execute("""
+ select
+ distinct 0,
+ if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
+ FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
+ where
+ ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+ alias,GenbankId,UniGeneId, Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE) )
+ and ProbeSet.symbol = '%s'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and Geno.SpeciesId=1 #XZ: I add this line to speed up query
+ and ProbeSetXRef.Locus = Geno.name
+ and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+ """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA))
+
+ resA2 = cursor.fetchall()
+ if resA2:
+ myFields = ['dummyA2','probesetcisA']
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resA2:
+ # fpText.write("j: result: '%s' \n" % k[j])
+ temp.append(k[j])
+ newdict[item] = temp
+ else:
+ newdict['probesetcisA'] = ''
+
+
+
+ # specially for this dataset only
+ newdict["hasexpr"] = 'n'
+ if len(newdict["meanA"])>0:
+ for mym in newdict["meanA"]:
+ if mym>8:
+ newdict["hasexpr"] = 'y'
+
+ # specially for this dataset only
+ newdict["hascis"] = 'n'
+ if len(newdict["probesetcisA"])>0:
+ for mym in newdict["probesetcisA"]:
+ if mym != 'no':
+ newdict["hascis"] = 'y'
+
+ else:
+ myFields = ['dummyA','probesetA,''meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA', 'probesetcisA']
+ for j, item in enumerate(myFields):
+ newdict[item] = "--"
+
+ # specially for this dataset only
+ newdict["hasexpr"] = 'n'
+ newdict["hascis"] = 'n'
+ newdict["score"] = 0
+
+########################## FOR B
+
+ newdict["score"] = 0
+ if newdict["hassnp"] == 'y':
+ newdict["score"] = newdict["score"] + 1
+ if newdict["hasexpr"] == 'y':
+ newdict["score"] = newdict["score"] + 1
+ if newdict["hasindel"] == 'y':
+ newdict["score"] = newdict["score"] + 1
+ if newdict["hascis"] == 'y':
+ newdict["score"] = newdict["score"] + 1
+
+
+
+ if species=='mouse':
+ cursor.execute("""
+ select
+ distinct 0,
+ ProbeSet.Name as TNAME,
+ round(ProbeSetXRef.Mean,1) as TMEAN,
+ round(ProbeSetXRef.LRS,1) as TLRS,
+ ProbeSet.Chr_num as TCHR_NUM,
+ ProbeSet.Mb as TMB,
+ ProbeSet.Symbol as TSYMBOL,
+ ProbeSet.name_num as TNAME_NUM
+ FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet
+ where
+ ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+ alias,GenbankId,UniGeneId, Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE) )
+ and ProbeSet.symbol = '%s'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+ """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB))
+
+ resB = cursor.fetchall()
+ if resB:
+ myFields = ['dummyB','probesetB','meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB']
+
+# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
+ #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"])
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resB:
+ # fpText.write("j: result: '%s' \n" % k[j])
+ temp.append(k[j])
+ newdict[item] = temp
+ # fpText.close()
+
+
+ # put probesetcisB here
+ cursor.execute("""
+ select
+ distinct 0,
+ if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
+ FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
+ where
+ ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+ alias,GenbankId,UniGeneId, Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE) )
+ and ProbeSet.symbol = '%s'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and Geno.SpeciesId=1 #XZ: I add this line to speed up query
+ and ProbeSetXRef.Locus = Geno.name
+ and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+ """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB))
+
+ resB2 = cursor.fetchall()
+ if resB2:
+ myFields = ['dummyB2','probesetcisB']
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resB2:
+ # fpText.write("j: result: '%s' \n" % k[j])
+ temp.append(k[j])
+ newdict[item] = temp
+ else:
+ newdict['probesetcisB'] = ''
+
+
+ else:
+ myFields = ['dummyB','probesetB,''meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB', 'probesetcisB']
+ for j, item in enumerate(myFields):
+ newdict[item] = "--"
+
+
+
+##########################
+
+
+########################## FOR C
+
+
+ if species=='mouse':
+ cursor.execute("""
+ select
+ distinct 0,
+ ProbeSet.Name as TNAME,
+ round(ProbeSetXRef.Mean,1) as TMEAN,
+ round(ProbeSetXRef.LRS,1) as TLRS,
+ ProbeSet.Chr_num as TCHR_NUM,
+ ProbeSet.Mb as TMB,
+ ProbeSet.Symbol as TSYMBOL,
+ ProbeSet.name_num as TNAME_NUM
+ FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet
+ where
+ ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+ alias,GenbankId,UniGeneId, Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE) )
+ and ProbeSet.symbol = '%s'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+ """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC))
+
+ resC = cursor.fetchall()
+ if resC:
+ myFields = ['dummyC','probesetC','meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC']
+
+# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
+ #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"])
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resC:
+ # fpText.write("j: result: '%s' \n" % k[j])
+ temp.append(k[j])
+ newdict[item] = temp
+ # fpText.close()
+
+
+ # put probesetcisC here
+ cursor.execute("""
+ select
+ distinct 0,
+ if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
+ FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
+ where
+ ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
+ alias,GenbankId,UniGeneId, Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE) )
+ and ProbeSet.symbol = '%s'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and Geno.SpeciesId=1 #XZ: I add this line to speed up query
+ and ProbeSetXRef.Locus = Geno.name
+ and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
+ """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC))
+
+ resC2 = cursor.fetchall()
+ if resC2:
+ myFields = ['dummyC2','probesetcisC']
+ for j, item in enumerate(myFields):
+ temp = []
+ for k in resC2:
+ # fpText.write("j: result: '%s' \n" % k[j])
+ temp.append(k[j])
+ newdict[item] = temp
+ else:
+ newdict['probesetcisC'] = ''
+
+ else:
+ myFields = ['dummyC','probesetC,''meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC', 'probesetcisC']
+ for j, item in enumerate(myFields):
+ newdict[item] = "--"
+
+
+
+
+
+
+
+
+ #load gene from other Species by the same name
+
+
+ for item in otherSpecies:
+ othSpec, othSpecId = item
+ newdict2 = {}
+
+ cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" %
+ (string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
+ resultsOther = cursor.fetchone()
+ if resultsOther:
+ for j, item in enumerate(fetchFields):
+ newdict2[item] = resultsOther[j]
+
+ #count SNPs if possible, could be a separate function
+ if diffCol and othSpec == 'mouse':
+ cursor.execute("""
+ select
+ count(*) from BXDSnpPosition
+ where
+ Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+ StrainId1 = %d AND StrainId2 = %d
+ """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
+
+
+ newdict2["snpCount"] = cursor.fetchone()[0]
+ newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
+ else:
+ newdict2["snpDensity"] = newdict2["snpCount"] = 0
+
+ try:
+ newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
+ except:
+ pass
+
+ newdict['%sGene' % othSpec] = newdict2
+
+ #newdict['RUDI']='hallo allemaal'
+
+ GeneList.append(newdict)
+
+
+ return GeneList
+
+