aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/intervalAnalyst/GeneUtil.py
blob: 43008ecfe8765d95a074537431932bfe8370c11a (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
#
# Created by GeneNetwork Core Team 2010/08/10
#
# Last updated by GeneNetwork Core Team 2010/10/20

import string

#Just return a list of dictionaries
#each dictionary contains sub-dictionary
def loadGenes(cursor, chrName, diffCol, startMb, endMb, webqtlDb =None, species='mouse'):
	#cursor.execute("desc GeneList")
	#results = cursor.fetchall()
	#fetchFields = map(lambda X:X[0], results)
	fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', 
	'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', 
	'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
	
	##List All Species in the Gene Table
	speciesDict = {}
	cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
			GeneList.SpeciesId = Species.Id group by GeneList.SpeciesId")
	results = cursor.fetchall()
	for item in results:
		speciesDict[item[0]] = item[1]
	
	##List current Species and other Species
	speciesId = speciesDict[species]
	otherSpecies = map(lambda X: [X, speciesDict[X]], speciesDict.keys())
	otherSpecies.remove([species, speciesId])

	cursor.execute("""SELECT %s from GeneList 
						where 
					SpeciesId = %d AND Chromosome = '%s' AND
					((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f))
					order by txStart
					""" 
					% (string.join(fetchFields, ", "), speciesId, chrName, startMb, endMb, startMb, endMb))
	results = cursor.fetchall()
	GeneList = []

	if results:
		for result in results:
			newdict = {}
			for j, item in enumerate(fetchFields):
				newdict[item] = result[j]
			#count SNPs if possible	
			if diffCol and species=='mouse':
				cursor.execute("""
					select 
						count(*) from BXDSnpPosition
					where 
						Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
						StrainId1 = %d AND StrainId2 = %d
				""" % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))
				newdict["snpCount"] = cursor.fetchone()[0]
				newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0
			else:
				newdict["snpDensity"] = newdict["snpCount"] = 0
			
			try:
				newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart'])
			except:
				pass
			
			#load gene from other Species by the same name
			for item in otherSpecies:
				othSpec, othSpecId = item
				newdict2 = {}
				
				cursor.execute("SELECT %s from GeneList where SpeciesId = %d and geneSymbol= '%s' limit 1" % 
							(string.join(fetchFields, ", "), othSpecId, newdict["GeneSymbol"]))
				resultsOther = cursor.fetchone()
				if resultsOther:
					for j, item in enumerate(fetchFields):
						newdict2[item] = resultsOther[j]
							
					#count SNPs if possible, could be a separate function	
					if diffCol and othSpec == 'mouse':
						cursor.execute("""
							select
								count(*) from BXDSnpPosition
							where
								Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
								StrainId1 = %d AND StrainId2 = %d
							""" % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1]))

						newdict2["snpCount"] = cursor.fetchone()[0]
						newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0
					else:
						newdict2["snpDensity"] = newdict2["snpCount"] = 0
						
					try:
						newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart'])
					except:
						pass
						
				newdict['%sGene' % othSpec] = newdict2
				
			GeneList.append(newdict)

	return GeneList