diff options
author | root | 2012-05-08 18:39:56 -0500 |
---|---|---|
committer | root | 2012-05-08 18:39:56 -0500 |
commit | ea46f42ee640928b92947bfb204c41a482d80937 (patch) | |
tree | 9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/qtlminer/GeneUtil.py | |
parent | 056b5253fc3857b0444382aa39944f6344dc1ceb (diff) | |
download | genenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz |
Add all the source codes into the github.
Diffstat (limited to 'web/webqtl/qtlminer/GeneUtil.py')
-rwxr-xr-x | web/webqtl/qtlminer/GeneUtil.py | 658 |
1 files changed, 658 insertions, 0 deletions
diff --git a/web/webqtl/qtlminer/GeneUtil.py b/web/webqtl/qtlminer/GeneUtil.py new file mode 100755 index 00000000..3ae7f3c0 --- /dev/null +++ b/web/webqtl/qtlminer/GeneUtil.py @@ -0,0 +1,658 @@ +import string +import os + + +from base import webqtlConfig + + +#Just return a list of dictionaries +#each dictionary contains sub-dictionary +def loadGenes(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse'): + #cursor.execute("desc GeneList") + #results = cursor.fetchall() + #fetchFields = map(lambda X:X[0], results) + fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', + 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', + 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] + + ##List All Species in the Gene Table + speciesDict = {} + cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \ + GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId") + results = cursor.fetchall() + for item in results: + speciesDict[item[0]] = item[1] + + ##List current Species and other Species + speciesId = speciesDict[species] + otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys()) + otherSpecies.remove([species, speciesId]) + + cursor.execute("""SELECT %s from GeneList + where + SpeciesId = %d AND Chromosome = '%s' AND + ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f)) + order by txStart + """ + % (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb)) + results = cursor.fetchall() + GeneList = [] + + if results: + for result in results: + newdict = {} + for j, item in enumerate(fetchFields): + newdict[item] = result[j] + #count SNPs if possible + if diffCol and species=='mouse': + cursor.execute(""" + select + count(*) from BXDSnpPosition + where + Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND + StrainId1 = %d AND StrainId2 = %d + """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])) + newdict["snpCount"] = cursor.fetchone()[0] + newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0 + else: + newdict["snpDensity"] = newdict["snpCount"] = 0 + + try: + newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart']) + except: + pass + + #load gene from other Species by the same name + for item in otherSpecies: + othSpec, othSpecId = item + newdict2 = {} + + cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" % + (string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"])) + resultsOther = cursor.fetchone() + if resultsOther: + for j, item in enumerate(fetchFields): + newdict2[item] = resultsOther[j] + + #count SNPs if possible, could be a separate function + if diffCol and othSpec == 'mouse': + cursor.execute(""" + select + count(*) from BXDSnpPosition + where + Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND + StrainId1 = %d AND StrainId2 = %d + """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])) + + + + newdict2["snpCount"] = cursor.fetchone()[0] + newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0 + else: + newdict2["snpDensity"] = newdict2["snpCount"] = 0 + + try: + newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart']) + except: + pass + + newdict['%sGene' % othSpec] = newdict2 + + GeneList.append(newdict) + + return GeneList + + + + + + +def loadGenesForQTLminer(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse', databaseA='HC_M2_0606_P', databaseB='HC_M2CB_1205_R', databaseC='Illum_LXS_Hipp_loess0807', str1='C57BL/6J', str2='DBA/2J'): + #cursor.execute("desc GeneList") + #results = cursor.fetchall() + #fetchFields = map(lambda X:X[0], results) + fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', + 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', + 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] + + ##List All Species in the Gene Table + speciesDict = {} + cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \ + GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId") + results = cursor.fetchall() + for item in results: + speciesDict[item[0]] = item[1] + + +# fpText = open(os.path.join(webqtlConfig.TMPDIR, "strains") + str(j) + '.txt','wb') +# fpText.write("strain: '%d' \n" % thisone ) +# fpText.close() +# strainids.append(thisone) + + + + + ##List current Species and other Species + speciesId = speciesDict[species] + otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys()) + otherSpecies.remove([species, speciesId]) + + cursor.execute("""SELECT %s from GeneList + where + SpeciesId = %d AND Chromosome = '%s' AND + ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f)) + order by txStart + """ + % (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb)) + results = cursor.fetchall() + GeneList = [] + + if results: + for result in results: + newdict = {} + for j, item in enumerate(fetchFields): + newdict[item] = result[j] + +## get pathways + + cursor.execute(""" + select + pathway + FROM + kegg.mmuflat + where + gene = '%s' + """ % (newdict["GeneID"]) ) + + resAAA = cursor.fetchall() + if resAAA: + myFields = ['pathways'] + for j, item in enumerate(myFields): + temp = [] + for k in resAAA: + temp.append(k[j]) + newdict["pathways"] = temp + + cursor.execute(""" + select + name + FROM + kegg.mmuflat + where + gene = '%s' + """ % (newdict["GeneID"]) ) + + resAAA = cursor.fetchall() + if resAAA: + myFields = ['pathwaynames'] + for j, item in enumerate(myFields): + temp = [] + for k in resAAA: + temp.append(k[j]) + newdict["pathwaynames"] = temp + +## get GO terms + + cursor.execute(""" + SELECT + distinct go.term.name + FROM go.gene_product + INNER JOIN go.dbxref ON (go.gene_product.dbxref_id=go.dbxref.id) + INNER JOIN go.association ON (go.gene_product.id=go.association.gene_product_id) + INNER JOIN go.term ON (go.association.term_id=go.term.id) + WHERE + go.dbxref.xref_key = (select mgi from go.genemgi where gene='%s' limit 1) + AND + go.dbxref.xref_dbname = 'MGI' + AND + go.term.term_type='biological_process' + """ % (newdict["GeneID"]) ) + + resAAA = cursor.fetchall() + if resAAA: + myFields = ['goterms'] + for j, item in enumerate(myFields): + temp = [] + for k in resAAA: + temp.append(k[j]) + newdict["goterms"] = temp + + + + + + + newdict["snpDensity"] = newdict["snpCount"] = newdict["snpCountall"] = newdict["snpCountmis"] = newdict["snpCountBXD"] = newdict["snpCountmissel"] = 0 + + #count SNPs if possible + if diffCol and species=='mouse': + cursor.execute(""" + select + count(*) from BXDSnpPosition + where + Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND + StrainId1 = %d AND StrainId2 = %d + """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])) + newdict["snpCount"] = cursor.fetchone()[0] + newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0 + else: + newdict["snpDensity"] = newdict["snpCount"] = 0 + + try: + newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart']) + except: + pass + + + +#self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", opt.geneName) + + + + + ## search with gene name... doesnt matter. it changed to start and end position anyway + ##self.cursor.execute("SELECT geneSymbol, chromosome, txStart, txEnd from GeneList where SpeciesId= 1 and geneSymbol = %s", newdict["GeneSymbol"]) + + + #count SNPs for all strains + cursor.execute(""" + SELECT + distinct SnpAll.Id + from + SnpAll + where + SpeciesId = '1' and SnpAll.Chromosome = '%s' AND + SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f AND + SnpAll.Exon='Y' + """ % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"])) + snpfetch = cursor.fetchall() + newdict["snpCountmis"] = len(snpfetch) + +## # count SNPs for selected strains + + sql = """SELECT + distinct SnpAll.Id, `%s`, `%s` + from + SnpAll, SnpPattern + where + SpeciesId = '1' and SnpAll.Chromosome = '%s' AND + SnpAll.Position >= %2.6f and SnpAll.Position < %2.6f and SnpAll.Id = SnpPattern.SnpId AND + SnpPattern.`%s` != SnpPattern.`%s` AND + SnpAll.Exon='Y' + """ % (str1, str2, newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"], str1, str2) + cursor.execute(sql) + ressnp = cursor.fetchall() + newdict["snpCountmissel"] = len(ressnp) + newdict["hassnp"] = 'n' + if len(ressnp)>0 : + newdict["hassnp"]= 'y' +## ####################################### NEW NEW NEW + + + + + + + + # count Indels for BXD mice + cursor.execute(""" + SELECT + distinct IndelAll.Name, IndelAll.Chromosome, IndelAll.SourceId, IndelAll.Mb_start, + IndelAll.Mb_end, IndelAll.Strand, IndelAll.Type, IndelAll.Size, IndelAll.InDelSequence, + SnpSource.Name + from + SnpSource, IndelAll + where + IndelAll.SpeciesId = '1' and IndelAll.Chromosome = '%s' AND + IndelAll.Mb_start >= %2.6f and IndelAll.Mb_start < (%2.6f+.0010) AND + SnpSource.Id = IndelAll.SourceId + order by IndelAll.Mb_start + """ % (newdict["Chromosome"], newdict["TxStart"], newdict["TxEnd"])) + + ressnp = cursor.fetchall() + newdict["indelCountBXD"] = len(ressnp) + newdict["hasindel"] = 'n' + newdict["hasexpr"] = 'n' + newdict["hascis"] = 'n' + newdict["score"] = 0 + if len(ressnp)>0 : + newdict["hasindel"]= 'y' + +## # cursor.execute(""" +## # select +## # Name from ProbeSet +## # where +## # GeneId = '%s' AND ChipId=4 limit 1 +## # """ % (newdict["GeneID"])) +## # if species=='mouse': +## # cursor.execute(""" +## # select +## # Name from ProbeSet +## # where +## # GeneId = '%s' AND ChipId=4 +## # """ % (newdict["GeneID"])) +## # results = cursor.fetchall() +## # psets = [] +## # for item in results: +## # psets.append(item) +## # newdict["probeset"] = psets +## # +## # else: +## # newdict["probeset"] = "empty" + + + + + if species=='mouse': + cursor.execute(""" + select + distinct 0, + ProbeSet.Name as TNAME, + round(ProbeSetXRef.Mean,1) as TMEAN, + round(ProbeSetXRef.LRS,1) as TLRS, + ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, + ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet + where + ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol, + alias,GenbankId,UniGeneId, Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE) ) + and ProbeSet.symbol = '%s' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1) + """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA)) + resA = cursor.fetchall() + + if resA: + myFields = ['dummyA','probesetA','meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA'] + +# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb') + #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"]) + for j, item in enumerate(myFields): + temp = [] + for k in resA: + # fpText.write("j: result: '%s' \n" % k[j]) + temp.append(k[j]) + newdict[item] = temp + # fpText.close() + + + # put probesetcisA here + + cursor.execute(""" + select + distinct 0, + if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis + FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet + where + ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol, + alias,GenbankId,UniGeneId, Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE) ) + and ProbeSet.symbol = '%s' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and Geno.SpeciesId=1 #XZ: I add this line to speed up query + and ProbeSetXRef.Locus = Geno.name + and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1) + """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseA)) + + resA2 = cursor.fetchall() + if resA2: + myFields = ['dummyA2','probesetcisA'] + for j, item in enumerate(myFields): + temp = [] + for k in resA2: + # fpText.write("j: result: '%s' \n" % k[j]) + temp.append(k[j]) + newdict[item] = temp + else: + newdict['probesetcisA'] = '' + + + + # specially for this dataset only + newdict["hasexpr"] = 'n' + if len(newdict["meanA"])>0: + for mym in newdict["meanA"]: + if mym>8: + newdict["hasexpr"] = 'y' + + # specially for this dataset only + newdict["hascis"] = 'n' + if len(newdict["probesetcisA"])>0: + for mym in newdict["probesetcisA"]: + if mym != 'no': + newdict["hascis"] = 'y' + + else: + myFields = ['dummyA','probesetA,''meanA','newlrsA','probesetchrA','probesetmbA','probesetsymbolA','probesetnamenumA', 'probesetcisA'] + for j, item in enumerate(myFields): + newdict[item] = "--" + + # specially for this dataset only + newdict["hasexpr"] = 'n' + newdict["hascis"] = 'n' + newdict["score"] = 0 + +########################## FOR B + + newdict["score"] = 0 + if newdict["hassnp"] == 'y': + newdict["score"] = newdict["score"] + 1 + if newdict["hasexpr"] == 'y': + newdict["score"] = newdict["score"] + 1 + if newdict["hasindel"] == 'y': + newdict["score"] = newdict["score"] + 1 + if newdict["hascis"] == 'y': + newdict["score"] = newdict["score"] + 1 + + + + if species=='mouse': + cursor.execute(""" + select + distinct 0, + ProbeSet.Name as TNAME, + round(ProbeSetXRef.Mean,1) as TMEAN, + round(ProbeSetXRef.LRS,1) as TLRS, + ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, + ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet + where + ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol, + alias,GenbankId,UniGeneId, Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE) ) + and ProbeSet.symbol = '%s' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1) + """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB)) + + resB = cursor.fetchall() + if resB: + myFields = ['dummyB','probesetB','meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB'] + +# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb') + #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"]) + for j, item in enumerate(myFields): + temp = [] + for k in resB: + # fpText.write("j: result: '%s' \n" % k[j]) + temp.append(k[j]) + newdict[item] = temp + # fpText.close() + + + # put probesetcisB here + cursor.execute(""" + select + distinct 0, + if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis + FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet + where + ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol, + alias,GenbankId,UniGeneId, Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE) ) + and ProbeSet.symbol = '%s' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and Geno.SpeciesId=1 #XZ: I add this line to speed up query + and ProbeSetXRef.Locus = Geno.name + and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1) + """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseB)) + + resB2 = cursor.fetchall() + if resB2: + myFields = ['dummyB2','probesetcisB'] + for j, item in enumerate(myFields): + temp = [] + for k in resB2: + # fpText.write("j: result: '%s' \n" % k[j]) + temp.append(k[j]) + newdict[item] = temp + else: + newdict['probesetcisB'] = '' + + + else: + myFields = ['dummyB','probesetB,''meanB','newlrsB','probesetchrB','probesetmbB','probesetsymbolB','probesetnamenumB', 'probesetcisB'] + for j, item in enumerate(myFields): + newdict[item] = "--" + + + +########################## + + +########################## FOR C + + + if species=='mouse': + cursor.execute(""" + select + distinct 0, + ProbeSet.Name as TNAME, + round(ProbeSetXRef.Mean,1) as TMEAN, + round(ProbeSetXRef.LRS,1) as TLRS, + ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, + ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSetFreeze, ProbeSet + where + ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol, + alias,GenbankId,UniGeneId, Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE) ) + and ProbeSet.symbol = '%s' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1) + """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC)) + + resC = cursor.fetchall() + if resC: + myFields = ['dummyC','probesetC','meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC'] + +# fpText = open(os.path.join(webqtlConfig.TMPDIR, "res") + '.txt','wb') + #fpText.write("newdictgeneid '%s' \n" % newdict["GeneId"]) + for j, item in enumerate(myFields): + temp = [] + for k in resC: + # fpText.write("j: result: '%s' \n" % k[j]) + temp.append(k[j]) + newdict[item] = temp + # fpText.close() + + + # put probesetcisC here + cursor.execute(""" + select + distinct 0, + if( (ProbeSet.Chr = Geno.Chr AND ProbeSetXRef.LRS > 10.0000000 and ABS(ProbeSet.Mb-Geno.Mb) < 10.0000000 ) , concat('yes(',round(ProbeSetXRef.LRS,1),')') , 'no') as cis + FROM Geno, ProbeSetXRef, ProbeSetFreeze, ProbeSet + where + ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol, + alias,GenbankId,UniGeneId, Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE) ) + and ProbeSet.symbol = '%s' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and Geno.SpeciesId=1 #XZ: I add this line to speed up query + and ProbeSetXRef.Locus = Geno.name + and ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Id = (select Id from ProbeSetFreeze where Name='%s' limit 1) + """ % (newdict["GeneSymbol"],newdict["GeneSymbol"],databaseC)) + + resC2 = cursor.fetchall() + if resC2: + myFields = ['dummyC2','probesetcisC'] + for j, item in enumerate(myFields): + temp = [] + for k in resC2: + # fpText.write("j: result: '%s' \n" % k[j]) + temp.append(k[j]) + newdict[item] = temp + else: + newdict['probesetcisC'] = '' + + else: + myFields = ['dummyC','probesetC,''meanC','newlrsC','probesetchrC','probesetmbC','probesetsymbolC','probesetnamenumC', 'probesetcisC'] + for j, item in enumerate(myFields): + newdict[item] = "--" + + + + + + + + + #load gene from other Species by the same name + + + for item in otherSpecies: + othSpec, othSpecId = item + newdict2 = {} + + cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" % + (string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"])) + resultsOther = cursor.fetchone() + if resultsOther: + for j, item in enumerate(fetchFields): + newdict2[item] = resultsOther[j] + + #count SNPs if possible, could be a separate function + if diffCol and othSpec == 'mouse': + cursor.execute(""" + select + count(*) from BXDSnpPosition + where + Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND + StrainId1 = %d AND StrainId2 = %d + """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])) + + + newdict2["snpCount"] = cursor.fetchone()[0] + newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0 + else: + newdict2["snpDensity"] = newdict2["snpCount"] = 0 + + try: + newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart']) + except: + pass + + newdict['%sGene' % othSpec] = newdict2 + + #newdict['RUDI']='hallo allemaal' + + GeneList.append(newdict) + + + return GeneList + + |