about summary refs log tree commit diff
path: root/web/webqtl/snpBrowser/GeneAnnot.py
diff options
context:
space:
mode:
authorroot2012-05-08 18:39:56 -0500
committerroot2012-05-08 18:39:56 -0500
commitea46f42ee640928b92947bfb204c41a482d80937 (patch)
tree9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/snpBrowser/GeneAnnot.py
parent056b5253fc3857b0444382aa39944f6344dc1ceb (diff)
downloadgenenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz
Add all the source codes into the github.
Diffstat (limited to 'web/webqtl/snpBrowser/GeneAnnot.py')
-rwxr-xr-xweb/webqtl/snpBrowser/GeneAnnot.py124
1 files changed, 124 insertions, 0 deletions
diff --git a/web/webqtl/snpBrowser/GeneAnnot.py b/web/webqtl/snpBrowser/GeneAnnot.py
new file mode 100755
index 00000000..5a889253
--- /dev/null
+++ b/web/webqtl/snpBrowser/GeneAnnot.py
@@ -0,0 +1,124 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+#########################################
+# A class for the information of a gene
+# An instance of this will be a gene
+# it is used by GeneListAnnot class
+#########################################
+
+
+class GeneAnnot:
+   geneSymbol = None # Initialize variables
+   txStart = -1
+   txEnd = -1
+   Strand = ''
+   exon_start = []
+   exon_end = []
+   cdsStart = -1
+   cdsEnd = -1
+   def __init__(self, query_result):
+      self.geneSymbol, self.txStart, self.txEnd, self.Strand, exonStart, exonEnd, self.cdsStart, self.cdsEnd = query_result
+      if exonStart and exonEnd:
+         exon_s= exonStart.split(',')
+         exon_e = exonEnd.split(',')
+         self.exon_start = [int(s) for s in exon_s[:-1]]
+         self.exon_end = [int(s) for s in exon_e[:-1]]
+         #debug.appendoutFile("%d %d"%(self.exon_start[0], self.exon_end[0]))
+      
+   def matchTranscript(self, pos):
+      ''' 1: cds; 2: 2k upstream; 3: 2k downstream; -1: outside; -2: no data'''
+      locus_type = -1
+      distance = 0
+      
+      if (not self.txStart) or (not self.txEnd):             # no data
+          locus_type = -2
+      elif (pos >= self.txStart) and (pos <=self.txEnd):
+          locus_type = 1  
+      elif (pos <self.txStart) and (pos > self.txStart - 0.002):
+          locus_type = 2
+          distance = self.txStart - pos
+      elif (pos > self.txEnd) and (pos < self.txEnd + 0.002):
+          locus_type = 3
+          distance = pos - self.txEnd
+                         
+      return [locus_type, distance]
+   
+   def matchDomain(self, pos):    
+      domain_type = None
+      function = None
+
+      num =  len(self.exon_start)             
+      if not domain_type:        #not UTR        
+        bp = pos * 1000000    
+        for i in range(0, num):
+         if (bp >= self.exon_start[i]) and (bp <= self.exon_end[i]):
+           num_index = i +1
+           if self.Strand == '-':
+              num_index = num - i
+           domain_type = "Exon %d"% (num_index)         
+           if self.cdsStart and self.cdsEnd:         # then this site in exon can be UTR or stop codon, given cds
+            if self.Strand == '+':
+               if pos < self.cdsStart:
+                  domain_type = "5' UTR"
+               elif pos > self.cdsEnd:
+                  domain_type = "3' UTR"        
+               elif (pos <= self.cdsEnd) and (pos > self.cdsEnd-0.000003):
+                  function =  "Stop Codon"            
+            elif self.Strand == '-':
+               if pos < self.cdsStart:
+                  domain_type = "3' UTR"
+               elif pos > self.cdsEnd:
+                  domain_type = "5' UTR"
+               elif (pos >= self.cdsStart) and (pos < self.cdsStart+0.000003):  
+                  function = "Stop Codon"              
+         
+        if not domain_type:
+           for j in range (0, len(self.exon_start) -1) :                      # not the last exon
+                num_index = j +1
+                if self.Strand == '-':
+                    num_index = num - j-1           
+                if (bp <= self.exon_end[j] + 2) and (bp > self.exon_end[j]) :
+                    domain_type = "Intron %d; Splice"% (num_index)                  #start splice 
+                    
+        if not domain_type: 
+           for k in range (1, len(self.exon_start)):                          # not the first exon
+                num_index = k +1
+                if self.Strand == '-':
+                    num_index = num - k -1              
+                if (bp >= self.exon_start[k] -2) and (bp <  self.exon_start[k]):
+                    domain_type = "Intron %d; Splice"% (num_index)                    # end splice
+                    
+        if not domain_type: 
+           for i in range (1, len(self.exon_start)):
+                num_index = i
+                if self.Strand == '-':
+                    num_index = num - i               
+                if (bp > self.exon_end[i-1]) and (bp < self.exon_start[i]):
+                   domain_type = "Intron %d"%num_index
+    
+      return [domain_type, function] 
+