1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
#
# Created by GeneNetwork Core Team 2010/08/10
#
# Last updated by GeneNetwork Core Team 2010/10/20
#########################################
# A class for the information of a gene
# An instance of this will be a gene
# it is used by GeneListAnnot class
#########################################
class GeneAnnot:
geneSymbol = None # Initialize variables
txStart = -1
txEnd = -1
Strand = ''
exon_start = []
exon_end = []
cdsStart = -1
cdsEnd = -1
def __init__(self, query_result):
self.geneSymbol, self.txStart, self.txEnd, self.Strand, exonStart, exonEnd, self.cdsStart, self.cdsEnd = query_result
if exonStart and exonEnd:
exon_s= exonStart.split(',')
exon_e = exonEnd.split(',')
self.exon_start = [int(s) for s in exon_s[:-1]]
self.exon_end = [int(s) for s in exon_e[:-1]]
#debug.appendoutFile("%d %d"%(self.exon_start[0], self.exon_end[0]))
def matchTranscript(self, pos):
''' 1: cds; 2: 2k upstream; 3: 2k downstream; -1: outside; -2: no data'''
locus_type = -1
distance = 0
if (not self.txStart) or (not self.txEnd): # no data
locus_type = -2
elif (pos >= self.txStart) and (pos <=self.txEnd):
locus_type = 1
elif (pos <self.txStart) and (pos > self.txStart - 0.002):
locus_type = 2
distance = self.txStart - pos
elif (pos > self.txEnd) and (pos < self.txEnd + 0.002):
locus_type = 3
distance = pos - self.txEnd
return [locus_type, distance]
def matchDomain(self, pos):
domain_type = None
function = None
num = len(self.exon_start)
if not domain_type: #not UTR
bp = pos * 1000000
for i in range(0, num):
if (bp >= self.exon_start[i]) and (bp <= self.exon_end[i]):
num_index = i +1
if self.Strand == '-':
num_index = num - i
domain_type = "Exon %d"% (num_index)
if self.cdsStart and self.cdsEnd: # then this site in exon can be UTR or stop codon, given cds
if self.Strand == '+':
if pos < self.cdsStart:
domain_type = "5' UTR"
elif pos > self.cdsEnd:
domain_type = "3' UTR"
elif (pos <= self.cdsEnd) and (pos > self.cdsEnd-0.000003):
function = "Stop Codon"
elif self.Strand == '-':
if pos < self.cdsStart:
domain_type = "3' UTR"
elif pos > self.cdsEnd:
domain_type = "5' UTR"
elif (pos >= self.cdsStart) and (pos < self.cdsStart+0.000003):
function = "Stop Codon"
if not domain_type:
for j in range (0, len(self.exon_start) -1) : # not the last exon
num_index = j +1
if self.Strand == '-':
num_index = num - j-1
if (bp <= self.exon_end[j] + 2) and (bp > self.exon_end[j]) :
domain_type = "Intron %d; Splice"% (num_index) #start splice
if not domain_type:
for k in range (1, len(self.exon_start)): # not the first exon
num_index = k +1
if self.Strand == '-':
num_index = num - k -1
if (bp >= self.exon_start[k] -2) and (bp < self.exon_start[k]):
domain_type = "Intron %d; Splice"% (num_index) # end splice
if not domain_type:
for i in range (1, len(self.exon_start)):
num_index = i
if self.Strand == '-':
num_index = num - i
if (bp > self.exon_end[i-1]) and (bp < self.exon_start[i]):
domain_type = "Intron %d"%num_index
return [domain_type, function]
|