aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/qtlminer/GeneUtil.py
diff options
context:
space:
mode:
authorzsloan2015-03-27 20:28:51 +0000
committerzsloan2015-03-27 20:28:51 +0000
commitd0911a04958a04042da02a334ccc528dae79cc17 (patch)
tree3c48e2e937c1dbeaf00a5697c87ed251afa5c8f1 /web/webqtl/qtlminer/GeneUtil.py
parenta840ad18e1fe3db98a359a159e9b9b72367a2839 (diff)
downloadgenenetwork2-d0911a04958a04042da02a334ccc528dae79cc17.tar.gz
Removed everything from 'web' directory except genofiles and renamed the directory to 'genotype_files'
Diffstat (limited to 'web/webqtl/qtlminer/GeneUtil.py')
-rwxr-xr-xweb/webqtl/qtlminer/GeneUtil.py658
1 files changed, 0 insertions, 658 deletions
diff --git a/web/webqtl/qtlminer/GeneUtil.py b/web/webqtl/qtlminer/GeneUtil.py
deleted file mode 100755
index 3ae7f3c0..00000000
--- a/web/webqtl/qtlminer/GeneUtil.py
+++ /dev/null
@@ -1,658 +0,0 @@
-import string
-import os
-
-
-from base import webqtlConfig
-
-
-#Just return a list of dictionaries
-#each dictionary contains sub-dictionary
-def loadGenes(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse'):
- #cursor.execute("desc GeneList")
- #results = cursor.fetchall()
- #fetchFields = map(lambda X:X[0], results)
- fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd',
- 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID',
- 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
-
- ##List All Species in the Gene Table
- speciesDict = {}
- cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
- GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
- results = cursor.fetchall()
- for item in results:
- speciesDict[item[0]] = item[1]
-
- ##List current Species and other Species
- speciesId = speciesDict[species]
- otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
- otherSpecies.remove([species, speciesId])
-
- cursor.execute("""SELECT %s from GeneList
- where
- SpeciesId = %d AND Chromosome = '%s' AND
- ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
- order by txStart
- """
- % (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
- results = cursor.fetchall()
- GeneList = []
-
- if results:
- for result in results:
- newdict = {}
- for j, item in enumerate(fetchFields):
- newdict[item] = result[j]
- #count SNPs if possible
- if diffCol and species=='mouse':
- cursor.execute("""
- select
- count(*) from BXDSnpPosition
- where
- Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
- StrainId1 = %d AND StrainId2 = %d
- """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
- newdict["snpCount"] = cursor.fetchone()[0]
- newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
- else:
- newdict["snpDensity"] = newdict["snpCount"] = 0
-
- try:
- newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
- except:
- pass
-
- #load gene from other Species by the same name
- for item in otherSpecies:
- othSpec, othSpecId = item
- newdict2 = {}
-
- cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" %
- (string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
- resultsOther = cursor.fetchone()
- if resultsOther:
- for j, item in enumerate(fetchFields):
- newdict2[item] = resultsOther[j]
-
- #count SNPs if possible, could be a separate function
- if diffCol and othSpec == 'mouse':
- cursor.execute("""
- select
- count(*) from BXDSnpPosition
- where
- Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
- StrainId1 = %d AND StrainId2 = %d
- """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
-
-
-
- newdict2["snpCount"] = cursor.fetchone()[0]
- newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
- else:
- newdict2["snpDensity"] = newdict2["snpCount"] = 0
-
- try:
- newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
- except:
- pass
-
- newdict['%sGene' % othSpec] = newdict2
-
- GeneList.append(newdict)
-
- return GeneList
-
-
-
-
-
-
-def loadGenesForQTLminer(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse', databaseA='HC_M2_0606_P', databaseB='HC_M2CB_1205_R', databaseC='Illum_LXS_Hipp_loess0807', str1='C57BL/6J', str2='DBA/2J'):
- #cursor.execute("desc GeneList")
- #results = cursor.fetchall()
- #fetchFields = map(lambda X:X[0], results)
- fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd',
- 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID',
- 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
-
- ##List All Species in the Gene Table
- speciesDict = {}
- cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
- GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
- results = cursor.fetchall()
- for item in results:
- speciesDict[item[0]] = item[1]
-
-
-# fpText = open(os.path.join(webqtlConfig.TMPDIR, "strains") + str(j) + '.txt','wb')
-# fpText.write("strain: '%d' \n" % thisone )
-# fpText.close()
-# strainids.append(thisone)
-
-
-
-
- ##List current Species and other Species
- speciesId = speciesDict[species]
- otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
- otherSpecies.remove([species, speciesId])
-
- cursor.execute("""SELECT %s from GeneList
- where
- SpeciesId = %d AND Chromosome = '%s' AND
- ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
- order by txStart
- """
- % (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
- results = cursor.fetchall()
- GeneList = []
-
- if results:
- for result in results:
- newdict = {}
- for j, item in enumerate(fetchFields):
- newdict[item] = result[j]
-
-## get pathways
-
- cursor.execute("""
- select
- pathway
- FROM
- kegg.mmuflat
- where
- gene = '%s'
- """ % (newdict["GeneID"]) )
-
- resAAA = cursor.fetchall()
- if resAAA:
- myFields = ['pathways']
- for j, item in enumerate(myFields):
- temp = []
- for k in resAAA:
- temp.append(k[j])
- newdict["pathways"] = temp
-
- cursor.execute("""
- select
- name
- FROM
- kegg.mmuflat
- where
- gene = '%s'
- """ % (newdict["GeneID"]) )
-
- resAAA = cursor.fetchall()
- if resAAA:
- myFields = ['pathwaynames']
- for j, item in enumerate(myFields):
- temp = []
- for k in resAAA:
- temp.append(k[j])
- newdict["pathwaynames"] = temp
-
-## get GO terms
-
- cursor.execute("""
- SELECT
- distinct go.term.name
- FROM go.gene_product
- INNER JOIN go.dbxref ON (go.gene_product.dbxref_id=go.dbxref.id)
- INNER JOIN go.association ON (go.gene_product.id=go.association.gene_product_id)
- INNER JOIN go.term ON (go.association.term_id=go.term.id)
- WHERE
- go.dbxref.xref_key = (select mgi from go.genemgi where gene='%s' limit 1)
- AND
- go.dbxref.xref_dbname = 'MGI'
- AND
- go.term.term_type='biological_process'
- """ % (newdict["GeneID"]) )
-
- resAAA = cursor.fetchall()
- if resAAA:
- myFields = ['goterms']
- for j, item in enumerate(myFields):
- temp = []
- for k in resAAA:
- temp.append(k[j])
- newdict["goterms"] = temp
-
-
-
-
-
-
- newdict["snpDensity"] = newdict["snpCount"] = newdict["snpCountall"] = newdict["snpCountmis"] = newdict["snpCountBXD"] = newdict["snpCountmissel"] = 0
-
- #count SNPs if possible
- if diffCol and species=='mouse':
- cursor.execute("""
- select
- count(*) from BXDSnpPosition
- where
- Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
- StrainId1 = %d AND StrainId2 = %d
- """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
- newdict["snpCount"] = cursor.fetchone()[0]
- newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
- else:
- newdict["snpDensity"] = newdict["snpCount"] = 0
-
- try:
- newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
- except:
- pass
-
-
-
-#self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", opt.geneName)
-
-
-
-
- ## search with gene name... doesnt matter. it changed to start and end position anyway
- ##self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", newdict["GeneSymbol"])
-
-
- #count SNPs for all strains
- cursor.execute("""
- SELECT
- distinct SnpAll.Id
- from
- SnpAll
- where
- SpeciesId = '1' and SnpAll.Chromosome = '%s' AND
- SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f AND
- SnpAll.Exon='Y'
- """ % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"]))
- snpfetch = cursor.fetchall()
- newdict["snpCountmis"] = len(snpfetch)
-
-## # count SNPs for selected strains
-
- sql = """SELECT
- distinct SnpAll.Id, `%s`, `%s`
- from
- SnpAll, SnpPattern
- where
- SpeciesId = '1' and SnpAll.Chromosome = '%s' AND
- SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f and SnpAll.Id = SnpPattern.SnpId AND
- SnpPattern.`%s` != SnpPattern.`%s` AND
- SnpAll.Exon='Y'
- """ % (str1, str2, newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"], str1, str2)
- cursor.execute(sql)
- ressnp = cursor.fetchall()
- newdict["snpCountmissel"] = len(ressnp)
- newdict["hassnp"] = 'n'
- if len(ressnp)>0 :
- newdict["hassnp"]= 'y'
-## ####################################### NEW NEW NEW
-
-
-
-
-
-
-
- # count Indels for BXD mice
- cursor.execute("""
- SELECT
- distinct IndelAll.Name, IndelAll.Chromosome, IndelAll.SourceId, IndelAll.Mb_start,
- IndelAll.Mb_end, IndelAll.Strand, IndelAll.Type, IndelAll.Size, IndelAll.InDelSequence,
- SnpSource.Name
- from
- SnpSource, IndelAll
- where
- IndelAll.SpeciesId = '1' and IndelAll.Chromosome = '%s' AND
- IndelAll.Mb_start >= %2.6f and IndelAll.Mb_start < (%2.6f+.0010) AND
- SnpSource.Id = IndelAll.SourceId
- order by IndelAll.Mb_start
- """ % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"]))
-
- ressnp = cursor.fetchall()
- newdict["indelCountBXD"] = len(ressnp)
- newdict["hasindel"] = 'n'
- newdict["hasexpr"] = 'n'
- newdict["hascis"] = 'n'
- newdict["score"] = 0
- if len(ressnp)>0 :
- newdict["hasindel"]= 'y'
-
-## # cursor.execute("""
-## # select
-## # Name from ProbeSet
-## # where
-## # GeneId = '%s' AND ChipId=4 limit 1
-## # """ % (newdict["GeneID"]))
-## # if species=='mouse':
-## # cursor.execute("""
-## # select
-## # Name from ProbeSet
-## # where
-## # GeneId = '%s' AND ChipId=4
-## # """ % (newdict["GeneID"]))
-## # results = cursor.fetchall()
-## # psets = []
-## # for item in results:
-## # psets.append(item)
-## # newdict["probeset"] = psets
-## #
-## # else:
-## # newdict["probeset"] = "empty"
-
-
-
-
- if species=='mouse':
- cursor.execute("""
- select
- distinct 0,
- ProbeSet.Name as TNAME,
- round(ProbeSetXRef.Mean,1) as TMEAN,
- round(ProbeSetXRef.LRS,1) as TLRS,
- ProbeSet.Chr_num as TCHR_NUM,
- ProbeSet.Mb as TMB,
- ProbeSet.Symbol as TSYMBOL,
- ProbeSet.name_num as TNAME_NUM
- FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet
- where
- ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
- alias,GenbankId,UniGeneId, Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE) )
- and ProbeSet.symbol = '%s'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
- """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA))
- resA = cursor.fetchall()
-
- if resA:
- myFields = ['dummyA','probesetA','meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA']
-
-# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
- #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"])
- for j, item in enumerate(myFields):
- temp = []
- for k in resA:
- # fpText.write("j: result: '%s' \n" % k[j])
- temp.append(k[j])
- newdict[item] = temp
- # fpText.close()
-
-
- # put probesetcisA here
-
- cursor.execute("""
- select
- distinct 0,
- if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
- FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
- where
- ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
- alias,GenbankId,UniGeneId, Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE) )
- and ProbeSet.symbol = '%s'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and Geno.SpeciesId=1 #XZ: I add this line to speed up query
- and ProbeSetXRef.Locus = Geno.name
- and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
- """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA))
-
- resA2 = cursor.fetchall()
- if resA2:
- myFields = ['dummyA2','probesetcisA']
- for j, item in enumerate(myFields):
- temp = []
- for k in resA2:
- # fpText.write("j: result: '%s' \n" % k[j])
- temp.append(k[j])
- newdict[item] = temp
- else:
- newdict['probesetcisA'] = ''
-
-
-
- # specially for this dataset only
- newdict["hasexpr"] = 'n'
- if len(newdict["meanA"])>0:
- for mym in newdict["meanA"]:
- if mym>8:
- newdict["hasexpr"] = 'y'
-
- # specially for this dataset only
- newdict["hascis"] = 'n'
- if len(newdict["probesetcisA"])>0:
- for mym in newdict["probesetcisA"]:
- if mym != 'no':
- newdict["hascis"] = 'y'
-
- else:
- myFields = ['dummyA','probesetA,''meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA', 'probesetcisA']
- for j, item in enumerate(myFields):
- newdict[item] = "--"
-
- # specially for this dataset only
- newdict["hasexpr"] = 'n'
- newdict["hascis"] = 'n'
- newdict["score"] = 0
-
-########################## FOR B
-
- newdict["score"] = 0
- if newdict["hassnp"] == 'y':
- newdict["score"] = newdict["score"] + 1
- if newdict["hasexpr"] == 'y':
- newdict["score"] = newdict["score"] + 1
- if newdict["hasindel"] == 'y':
- newdict["score"] = newdict["score"] + 1
- if newdict["hascis"] == 'y':
- newdict["score"] = newdict["score"] + 1
-
-
-
- if species=='mouse':
- cursor.execute("""
- select
- distinct 0,
- ProbeSet.Name as TNAME,
- round(ProbeSetXRef.Mean,1) as TMEAN,
- round(ProbeSetXRef.LRS,1) as TLRS,
- ProbeSet.Chr_num as TCHR_NUM,
- ProbeSet.Mb as TMB,
- ProbeSet.Symbol as TSYMBOL,
- ProbeSet.name_num as TNAME_NUM
- FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet
- where
- ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
- alias,GenbankId,UniGeneId, Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE) )
- and ProbeSet.symbol = '%s'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
- """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB))
-
- resB = cursor.fetchall()
- if resB:
- myFields = ['dummyB','probesetB','meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB']
-
-# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
- #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"])
- for j, item in enumerate(myFields):
- temp = []
- for k in resB:
- # fpText.write("j: result: '%s' \n" % k[j])
- temp.append(k[j])
- newdict[item] = temp
- # fpText.close()
-
-
- # put probesetcisB here
- cursor.execute("""
- select
- distinct 0,
- if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
- FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
- where
- ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
- alias,GenbankId,UniGeneId, Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE) )
- and ProbeSet.symbol = '%s'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and Geno.SpeciesId=1 #XZ: I add this line to speed up query
- and ProbeSetXRef.Locus = Geno.name
- and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
- """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB))
-
- resB2 = cursor.fetchall()
- if resB2:
- myFields = ['dummyB2','probesetcisB']
- for j, item in enumerate(myFields):
- temp = []
- for k in resB2:
- # fpText.write("j: result: '%s' \n" % k[j])
- temp.append(k[j])
- newdict[item] = temp
- else:
- newdict['probesetcisB'] = ''
-
-
- else:
- myFields = ['dummyB','probesetB,''meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB', 'probesetcisB']
- for j, item in enumerate(myFields):
- newdict[item] = "--"
-
-
-
-##########################
-
-
-########################## FOR C
-
-
- if species=='mouse':
- cursor.execute("""
- select
- distinct 0,
- ProbeSet.Name as TNAME,
- round(ProbeSetXRef.Mean,1) as TMEAN,
- round(ProbeSetXRef.LRS,1) as TLRS,
- ProbeSet.Chr_num as TCHR_NUM,
- ProbeSet.Mb as TMB,
- ProbeSet.Symbol as TSYMBOL,
- ProbeSet.name_num as TNAME_NUM
- FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet
- where
- ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
- alias,GenbankId,UniGeneId, Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE) )
- and ProbeSet.symbol = '%s'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
- """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC))
-
- resC = cursor.fetchall()
- if resC:
- myFields = ['dummyC','probesetC','meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC']
-
-# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb')
- #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"])
- for j, item in enumerate(myFields):
- temp = []
- for k in resC:
- # fpText.write("j: result: '%s' \n" % k[j])
- temp.append(k[j])
- newdict[item] = temp
- # fpText.close()
-
-
- # put probesetcisC here
- cursor.execute("""
- select
- distinct 0,
- if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis
- FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet
- where
- ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,
- alias,GenbankId,UniGeneId, Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE) )
- and ProbeSet.symbol = '%s'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and Geno.SpeciesId=1 #XZ: I add this line to speed up query
- and ProbeSetXRef.Locus = Geno.name
- and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1)
- """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC))
-
- resC2 = cursor.fetchall()
- if resC2:
- myFields = ['dummyC2','probesetcisC']
- for j, item in enumerate(myFields):
- temp = []
- for k in resC2:
- # fpText.write("j: result: '%s' \n" % k[j])
- temp.append(k[j])
- newdict[item] = temp
- else:
- newdict['probesetcisC'] = ''
-
- else:
- myFields = ['dummyC','probesetC,''meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC', 'probesetcisC']
- for j, item in enumerate(myFields):
- newdict[item] = "--"
-
-
-
-
-
-
-
-
- #load gene from other Species by the same name
-
-
- for item in otherSpecies:
- othSpec, othSpecId = item
- newdict2 = {}
-
- cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" %
- (string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
- resultsOther = cursor.fetchone()
- if resultsOther:
- for j, item in enumerate(fetchFields):
- newdict2[item] = resultsOther[j]
-
- #count SNPs if possible, could be a separate function
- if diffCol and othSpec == 'mouse':
- cursor.execute("""
- select
- count(*) from BXDSnpPosition
- where
- Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
- StrainId1 = %d AND StrainId2 = %d
- """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
-
-
- newdict2["snpCount"] = cursor.fetchone()[0]
- newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
- else:
- newdict2["snpDensity"] = newdict2["snpCount"] = 0
-
- try:
- newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
- except:
- pass
-
- newdict['%sGene' % othSpec] = newdict2
-
- #newdict['RUDI']='hallo allemaal'
-
- GeneList.append(newdict)
-
-
- return GeneList
-
-