about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xweb/webqtl/search/SearchResultPage.py6
-rwxr-xr-xwqflask/base/data_set.py556
-rwxr-xr-xwqflask/base/webqtlTrait.py25
-rwxr-xr-xwqflask/dbFunction/webqtlDatabaseFunction.py23
-rw-r--r--wqflask/wqflask/correlation/CorrelationPage.py4
-rw-r--r--wqflask/wqflask/do_search.py22
-rw-r--r--wqflask/wqflask/search_results.py325
7 files changed, 514 insertions, 447 deletions
diff --git a/web/webqtl/search/SearchResultPage.py b/web/webqtl/search/SearchResultPage.py
index 029a54c4..d62bb449 100755
--- a/web/webqtl/search/SearchResultPage.py
+++ b/web/webqtl/search/SearchResultPage.py
@@ -14,7 +14,7 @@ from htmlgen import HTMLgen2 as HT
 from base import webqtlConfig
 from utility.THCell import THCell
 from utility.TDCell import TDCell
-from base.webqtlDataset import webqtlDataset
+from base.data_set import DataSet
 from base.webqtlTrait import webqtlTrait
 from base.templatePage import templatePage
 from utility import webqtlUtil
@@ -65,12 +65,12 @@ class SearchResultPage(templatePage):
 				InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
 				PublishFreeze.InbredSetId""")
 			results = self.cursor.fetchall()
-			self.database = map(lambda x: webqtlDataset(x[0], self.cursor), results)
+			self.database = map(lambda x: DataSet(x[0], self.cursor), results)
 			self.databaseCrosses = map(lambda x: x[1], results)
 			self.databaseCrossIds = map(lambda x: x[2], results)
 			self.singleCross = False
 		else:
-			self.database = map(lambda x: webqtlDataset(x, self.cursor), self.database)
+			self.database = map(lambda x: DataSet(x, self.cursor), self.database)
 			#currently, webqtl wouldn't allow multiple crosses
 			#for other than multiple publish db search
 			#so we can use the first database as example
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 992c673e..9e3e6d81 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -19,64 +19,64 @@
 #
 #
 # This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
+
+from __future__ import print_function, division
 
 from htmlgen import HTMLgen2 as HT
 
 import webqtlConfig
 
+from pprint import pformat as pf
 
+# Used by create_database to instantiate objects
+DS_NAME_MAP = {}
+
+def create_dataset(db_conn, dataset_name):
+    cursor = db_conn.cursor()
+    cursor.execute("""
+        SELECT DBType.Name
+        FROM DBList, DBType
+        WHERE DBList.Name = %s and
+              DBType.Id = DBList.DBTypeId
+        """, (dataset_name))
+    print("dataset_name:", dataset_name)
+    dataset_type = cursor.fetchone()[0]
+    print("dataset_type:", pf(dataset_type))
+    
+    dataset_ob = DS_NAME_MAP[dataset_type]
+    #dataset_class = getattr(data_set, dataset_ob)
+    
+    print("DS_NAME_MAP:", pf(DS_NAME_MAP))
+    
+    dataset_class = globals()[dataset_ob]
+    return dataset_class(dataset_name, db_conn)
 
 class DataSet(object):
     """
-    Dataset class defines a dataset in webqtl, can be either Microarray,
+    DataSet class defines a dataset in webqtl, can be either Microarray,
     Published phenotype, genotype, or user input dataset(temp)
 
     """
 
-    def __init__(self, dbName, cursor=None):
-
-        assert dbName
-        self.id = 0
-        self.name = ''
-        self.type = ''
-        self.group = ''
-        self.cursor = cursor
-
-        #temporary storage
-        if dbName.find('Temp') >= 0:
-            self.searchfield = ['name','description']
-            self.disfield = ['name','description']
-            self.type = 'Temp'
-            self.id = 1
-            self.fullname = 'Temporary Storage'
-            self.shortname = 'Temp'
-        elif dbName.find('Publish') >= 0:
-            pass
-        elif dbName.find('Geno') >= 0:
-            self.searchfield = ['name','chr']
-            self.disfield = ['name','chr','mb', 'source2', 'sequence']
-            self.type = 'Geno'
-        else: #ProbeSet
-            self.searchfield = ['name','description','probe_target_description',
-                    'symbol','alias','genbankid','unigeneid','omim',
-                    'refseq_transcriptid','probe_set_specificity', 'probe_set_blat_score']
-            self.disfield = ['name','symbol','description','probe_target_description',
-                    'chr','mb','alias','geneid','genbankid', 'unigeneid', 'omim',
-                    'refseq_transcriptid','blatseq','targetseq','chipid', 'comments',
-                    'strand_probe','strand_gene','probe_set_target_region',
-                    'probe_set_specificity', 'probe_set_blat_score','probe_set_blat_mb_start',
-                    'probe_set_blat_mb_end', 'probe_set_strand',
-                    'probe_set_note_by_rw', 'flag']
-            self.type = 'ProbeSet'
-        self.name = dbName
-        if self.cursor and self.id == 0:
-            self.retrieveName()
-            
-    
+    def __init__(self, name, db_conn):
+
+        assert name
+        self.name = name
+        self.db_conn = db_conn
+        self.cursor = self.db_conn.cursor()
+        self.id = None
+        self.type = None
+        self.group = None
+
+        #if self.cursor and self.id == 0:
+        self.setup()
+        
+        self.check_confidentiality()
+        
+        self.retrieve_name()
+        self.get_group()
+        
+
     # Delete this eventually
     @property
     def riset():
@@ -85,8 +85,93 @@ class DataSet(object):
 
     def get_group(self):
         assert self.cursor
-        if self.type == 'Publish':
-            query = '''
+        self.cursor.execute(self.query)
+        self.group, self.group_id = self.cursor.fetchone()
+        if self.group == 'BXD300':
+            self.group = "BXD"
+        #return group
+
+
+    def retrieve_name(self):
+        """
+        If the data set name parameter is not found in the 'Name' field of the data set table,
+        check if it is actually the FullName or ShortName instead.
+        
+        This is not meant to retrieve the data set info if no name at all is passed.
+        
+        """
+
+        query_args = tuple(self.db_conn.escape_string(x) for x in (
+            (self.type + "Freeze"),
+            str(webqtlConfig.PUBLICTHRESH),
+            self.name,
+            self.name,
+            self.name))
+        print("query_args are:", query_args)
+        
+        query = '''
+                SELECT
+                        Id, Name, FullName, ShortName
+                FROM
+                        %s
+                WHERE
+                        public > %s AND
+                        (Name = "%s" OR FullName = "%s" OR ShortName = "%s")
+          ''' % (query_args)
+            
+        self.cursor.execute(query)
+        self.id, self.name, self.fullname, self.shortname = self.cursor.fetchone()
+
+
+    #def genHTML(self, Class='c0dd'):
+    #    return  HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class),
+    #            url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank")
+
+class PhenotypeDataSet(DataSet):
+    DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
+
+    def setup(self):
+        # Fields in the database table
+        self.search_fields = ['Phenotype.Post_publication_description',
+                            'Phenotype.Pre_publication_description',
+                            'Phenotype.Pre_publication_abbreviation',
+                            'Phenotype.Post_publication_abbreviation',
+                            'Phenotype.Lab_code',
+                            'Publication.PubMed_ID',
+                            'Publication.Abstract',
+                            'Publication.Title',
+                            'Publication.Authors',
+                            'PublishXRef.Id']
+        
+        # Figure out what display_fields is
+        self.display_fields = ['name',
+                               'pubmed_id',
+                               'pre_publication_description',
+                               'post_publication_description',
+                               'original_description',
+                               'pre_publication_abbreviation',
+                               'post_publication_abbreviation',
+                               'lab_code',
+                               'submitter', 'owner',
+                               'authorized_users',
+                               'authors', 'title',
+                               'abstract', 'journal',
+                               'volume', 'pages',
+                               'month', 'year',
+                               'sequence', 'units', 'comments']
+
+        # Fields displayed in the search results table header
+        self.header_fields = ['',
+                            'ID',
+                            'Description',
+                            'Authors',
+                            'Year',
+                            'Max LRS',
+                            'Max LRS Location']        
+
+        self.type = 'Publish'
+        
+        self.query = '''
                             SELECT
                                     InbredSet.Name, InbredSet.Id
                             FROM
@@ -94,69 +179,336 @@ class DataSet(object):
                             WHERE
                                     PublishFreeze.InbredSetId = InbredSet.Id AND
                                     PublishFreeze.Name = "%s"
-                    ''' % self.name
-        elif self.type == 'Geno':
-            query = '''
-                            SELECT
-                                    InbredSet.Name, InbredSet.Id
-                            FROM
-                                    InbredSet, GenoFreeze
-                            WHERE
-                                    GenoFreeze.InbredSetId = InbredSet.Id AND
-                                    GenoFreeze.Name = "%s"
-                    ''' % self.name
-        elif self.type == 'ProbeSet':
-            query = '''
-                            SELECT
-                                    InbredSet.Name, InbredSet.Id
-                            FROM
-                                    InbredSet, ProbeSetFreeze, ProbeFreeze
-                            WHERE
-                                    ProbeFreeze.InbredSetId = InbredSet.Id AND
-                                    ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
-                                    ProbeSetFreeze.Name = "%s"
-                    ''' % self.name
-        else:
-            return ""
-        self.cursor.execute(query)
-        group, RIID = self.cursor.fetchone()
-        if group == 'BXD300':
-            group = "BXD"
-        self.group = group
-        self.group_id = RIID
-        return group
+                    ''' % self.db_conn.escape_string(self.name)
+                    
+    def check_confidentiality(self):
+        # (Urgently?) Need to write this
+        pass
+    
+    def get_trait_info(self, trait_list, species = ''):
+        for this_trait in trait_list:
+            if not this_trait.haveinfo:
+                this_trait.retrieveInfo(QTL=1)
+
+            description = this_trait.post_publication_description
+            if this_trait.confidential:
+                if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
+                    description = this_trait.pre_publication_description
+            this_trait.description_display = description
+
+            if not this_trait.year.isdigit():
+                this_trait.pubmed_text = "N/A"
+
+            if this_trait.pubmed_id:
+                this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id
+
+            #LRS and its location
+            this_trait.LRS_score_repr = "N/A"
+            this_trait.LRS_score_value = 0
+            this_trait.LRS_location_repr = "N/A"
+            this_trait.LRS_location_value = 1000000
+
+            if this_trait.lrs:
+                self.cursor.execute("""
+                    select Geno.Chr, Geno.Mb from Geno, Species
+                    where Species.Name = '%s' and
+                        Geno.Name = '%s' and
+                        Geno.SpeciesId = Species.Id
+                """ % (species, this_trait.locus))
+                result = self.cursor.fetchone()
 
+                if result:
+                    if result[0] and result[1]:
+                        LRS_Chr = result[0]
+                        LRS_Mb = result[1]
 
-    def retrieveName(self):
-        assert self.id == 0 and self.cursor
+                        #XZ: LRS_location_value is used for sorting
+                        try:
+                            LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
+                        except:
+                            if LRS_Chr.upper() == 'X':
+                                LRS_location_value = 20*1000 + float(LRS_Mb)
+                            else:
+                                LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
+
+                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
+                        this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
+                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )    
+        
+class GenotypeDataSet(DataSet):
+    DS_NAME_MAP['Geno'] = 'GenotypeDataSet'
+    
+    def setup(self):
+        # Fields in the database table
+        self.search_fields = ['Name',
+                              'Chr']
+        
+        # Find out what display_fields is
+        self.display_fields = ['name',
+                               'chr',
+                               'mb',
+                               'source2',
+                               'sequence']
+        
+        # Fields displayed in the search results table header
+        self.header_fields = ['',
+                              'ID',
+                              'Location']        
+        
+        # Todo: Obsolete or rename this field
+        self.type = 'Geno'
+        
         query = '''
                 SELECT
-                        Id, Name, FullName, ShortName
+                        InbredSet.Name, InbredSet.Id
                 FROM
-                        %sFreeze
+                        InbredSet, GenoFreeze
                 WHERE
-                        public > %d AND
-                        (Name = "%s" OR FullName = "%s" OR ShortName = "%s")
-          '''% (self.type, webqtlConfig.PUBLICTHRESH, self.name, self.name, self.name)
-        try:
+                        GenoFreeze.InbredSetId = InbredSet.Id AND
+                        GenoFreeze.Name = "%s"
+                ''' % self.db_conn.escape_string(self.name)
+                
+    def check_confidentiality(self):
+        return geno_mrna_confidentiality(self)
+    
+    def get_trait_info(self, trait_list):
+        for this_trait in trait_list:
+            if not this_trait.haveinfo:
+                this_trait.retrieveInfo()
+
+            #XZ: trait_location_value is used for sorting
+            trait_location_repr = 'N/A'
+            trait_location_value = 1000000
+
+            if this_trait.chr and this_trait.mb:
+                try:
+                    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
+                except:
+                    if this_trait.chr.upper() == 'X':
+                        trait_location_value = 20*1000 + this_trait.mb
+                    else:
+                        trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
+
+                this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
+                this_trait.location_value = trait_location_value    
+    
+                
+class MrnaAssayDataSet(DataSet):
+    '''
+    An mRNA Assay is a quantitative assessment (assay) associated with an mRNA trait
+    
+    This used to be called ProbeSet, but that term only refers specifically to the Affymetrix
+    platform and is far too specific.
+    
+    '''
+    DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet'
+
+    def setup(self):
+        # Fields in the database table
+        self.search_fields = ['Name',
+                              'Description',
+                              'Probe_Target_Description',
+                              'Symbol',
+                              'Alias',
+                              'GenbankId',
+                              'UniGeneId',
+                              'RefSeq_TranscriptId']
+
+        # Find out what display_fields is
+        self.display_fields = ['name', 'symbol',
+                               'description', 'probe_target_description',
+                               'chr', 'mb',
+                               'alias', 'geneid',
+                               'genbankid', 'unigeneid',
+                               'omim', 'refseq_transcriptid',
+                               'blatseq', 'targetseq',
+                               'chipid', 'comments',
+                               'strand_probe', 'strand_gene',
+                               'probe_set_target_region',
+                               'probe_set_specificity',
+                               'probe_set_blat_score',
+                               'probe_set_blat_mb_start',
+                               'probe_set_blat_mb_end',
+                               'probe_set_strand',
+                               'probe_set_note_by_rw',
+                               'flag']
+
+        # Fields displayed in the search results table header
+        self.header_fields = ['',
+                             'ID',
+                             'Symbol',
+                             'Description',
+                             'Location',
+                             'Mean Expr',
+                             'Max LRS',
+                             'Max LRS Location']       
+
+        # Todo: Obsolete or rename this field
+        self.type = 'ProbeSet'
+
+        self.query = '''
+                        SELECT
+                                InbredSet.Name, InbredSet.Id
+                        FROM
+                                InbredSet, ProbeSetFreeze, ProbeFreeze
+                        WHERE
+                                ProbeFreeze.InbredSetId = InbredSet.Id AND
+                                ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
+                                ProbeSetFreeze.Name = "%s"
+                ''' % self.db_conn.escape_string(self.name)
+
+
+    def check_confidentiality(self):
+        return geno_mrna_confidentiality(self)
+    
+    def get_trait_info(self, trait_list=None, species=''):
+
+        #  Note: setting trait_list to [] is probably not a great idea.
+        if not trait_list:
+            trait_list = []
+
+        for this_trait in trait_list:
+
+            if not this_trait.haveinfo:
+                this_trait.retrieveInfo(QTL=1)
+
+            if this_trait.symbol:
+                pass
+            else:
+                this_trait.symbol = "N/A"
+
+            #XZ, 12/08/2008: description
+            #XZ, 06/05/2009: Rob asked to add probe target description
+            description_string = str(this_trait.description).strip()
+            target_string = str(this_trait.probe_target_description).strip()
+
+            description_display = ''
+
+            if len(description_string) > 1 and description_string != 'None':
+                description_display = description_string
+            else:
+                description_display = this_trait.symbol
+
+            if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None':
+                description_display = description_display + '; ' + target_string.strip()
+
+            # Save it for the jinja2 tablet
+            this_trait.description_display = description_display
+
+            #XZ: trait_location_value is used for sorting
+            trait_location_repr = 'N/A'
+            trait_location_value = 1000000
+
+            if this_trait.chr and this_trait.mb:
+                try:
+                    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
+                except:
+                    if this_trait.chr.upper() == 'X':
+                        trait_location_value = 20*1000 + this_trait.mb
+                    else:
+                        trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
+
+                this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) )
+                this_trait.location_value = trait_location_value
+                #this_trait.trait_location_value = trait_location_value
+
+            #XZ, 01/12/08: This SQL query is much faster.
+            query = (
+"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
+    where ProbeSetXRef.ProbeSetFreezeId = %s and
+    ProbeSet.Id = ProbeSetXRef.ProbeSetId and
+    ProbeSet.Name = '%s'
+            """ % (self.db_conn.escape_string(str(this_trait.db.id)),
+                   self.db_conn.escape_string(this_trait.name)))
+
+            print("query is:", pf(query))
+            
             self.cursor.execute(query)
-            self.id,self.name,self.fullname,self.shortname=self.cursor.fetchone()
-        except:
-            raise KeyError, `self.name`+' doesn\'t exist.'
+            result = self.cursor.fetchone()
 
+            if result:
+                if result[0]:
+                    mean = result[0]
+                else:
+                    mean=0
+            else:
+                mean = 0
 
-    def genHTML(self, Class='c0dd'):
-        return  HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class),
-                url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank")
+            #XZ, 06/05/2009: It is neccessary to turn on nowrap
+            this_trait.mean = repr = "%2.3f" % mean
 
-class PhenotypeDataSet(DataSet):
+            #LRS and its location
+            this_trait.LRS_score_repr = 'N/A'
+            this_trait.LRS_score_value = 0
+            this_trait.LRS_location_repr = 'N/A'
+            this_trait.LRS_location_value = 1000000
+
+            #Max LRS and its Locus location
+            if this_trait.lrs and this_trait.locus:
+                self.cursor.execute("""
+                    select Geno.Chr, Geno.Mb from Geno, Species
+                    where Species.Name = '%s' and
+                        Geno.Name = '%s' and
+                        Geno.SpeciesId = Species.Id
+                """ % (species, this_trait.locus))
+                result = self.cursor.fetchone()
+
+                if result:
+                    if result[0] and result[1]:
+                        LRS_Chr = result[0]
+                        LRS_Mb = result[1]
+
+                        #XZ: LRS_location_value is used for sorting
+                        try:
+                            LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
+                        except:
+                            if LRS_Chr.upper() == 'X':
+                                LRS_location_value = 20*1000 + float(LRS_Mb)
+                            else:
+                                LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
+
+                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
+                        this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
+                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )    
+
+
+class TempDataSet(DataSet):
+    '''Temporary user-generated data set'''
     
-    def __init__(self):
-        self.searchfield = ['name','post_publication_description','abstract','title','authors']
-        self.disfield = ['name','pubmed_id',
-                            'pre_publication_description', 'post_publication_description', 'original_description',
-                            'pre_publication_abbreviation', 'post_publication_abbreviation',
-                            'lab_code', 'submitter', 'owner', 'authorized_users',
-                            'authors','title','abstract', 'journal','volume','pages','month',
-                            'year','sequence', 'units', 'comments']
-        self.type = 'Publish'
\ No newline at end of file
+    def setup(self):
+        self.search_fields = ['name',
+                              'description']
+        
+        self.display_fields = ['name',
+                               'description']
+        
+        self.header_fields = ['Name',
+                              'Description']
+        
+        self.type = 'Temp'
+        
+        # Need to double check later how these are used
+        self.id = 1
+        self.fullname = 'Temporary Storage'
+        self.shortname = 'Temp'
+        
+        
+def geno_mrna_confidentiality(ob):
+    dataset_table = ob.type + "Freeze"
+    print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
+
+    query = '''SELECT Id, Name, FullName, confidentiality,
+                        AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table)
+
+    ob.cursor.execute(query, ob.name)
+
+    (dataset_id,
+     name,
+     full_name,
+     confidential,
+     authorized_users) = ob.cursor.fetchall()[0]
+
+    if confidential:
+        # Allow confidential data later
+        NoConfindetialDataForYouTodaySorry
+    
\ No newline at end of file
diff --git a/wqflask/base/webqtlTrait.py b/wqflask/base/webqtlTrait.py
index 51d36ab2..29087721 100755
--- a/wqflask/base/webqtlTrait.py
+++ b/wqflask/base/webqtlTrait.py
@@ -6,7 +6,7 @@ from htmlgen import HTMLgen2 as HT
 
 import webqtlConfig
 from webqtlCaseData import webqtlCaseData
-from webqtlDataset import webqtlDataset
+from data_set import create_dataset
 from dbFunction import webqtlDatabaseFunction
 from utility import webqtlUtil
 
@@ -20,9 +20,10 @@ class webqtlTrait:
 
     """
 
-    def __init__(self, cursor = None, **kw):
+    def __init__(self, db_conn, **kw):
         print("in webqtlTrait")
-        self.cursor = cursor
+        self.db_conn = db_conn
+        self.cursor = self.db_conn.cursor()
         self.db = None                  # database object
         self.name = ''                  # Trait ID, ProbeSet ID, Published ID, etc.
         self.cellid = ''
@@ -50,7 +51,7 @@ class webqtlTrait:
 
         if self.db and isinstance(self.db, basestring):
             assert self.cursor, "Don't have a cursor"
-            self.db = webqtlDataset(self.db, self.cursor)
+            self.db = create_dataset(self.db_conn, self.db)
 
         #if self.db == None, not from a database
         print("self.db is:", self.db, type(self.db))
@@ -396,8 +397,8 @@ class webqtlTrait:
         #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
         #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
         elif self.db.type == 'ProbeSet':
-            disfieldString = string.join(self.db.disfield,',ProbeSet.')
-            disfieldString = 'ProbeSet.' + disfieldString
+            display_fields_string = ',ProbeSet.'.join(self.db.display_fields)
+            display_fields_string = 'ProbeSet.' + display_fields_string
             query = """
                     SELECT %s
                     FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
@@ -406,12 +407,12 @@ class webqtlTrait:
                             ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                             ProbeSetFreeze.Name = '%s' AND
                             ProbeSet.Name = '%s'
-                    """ % (disfieldString, self.db.name, self.name)
+                    """ % (display_fields_string, self.db.name, self.name)
         #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
         # to avoid the problem of same marker name from different species.
         elif self.db.type == 'Geno':
-            disfieldString = string.join(self.db.disfield,',Geno.')
-            disfieldString = 'Geno.' + disfieldString
+            display_fields_string = string.join(self.db.display_fields,',Geno.')
+            display_fields_string = 'Geno.' + display_fields_string
             query = """
                     SELECT %s
                     FROM Geno, GenoFreeze, GenoXRef
@@ -420,10 +421,10 @@ class webqtlTrait:
                             GenoXRef.GenoId = Geno.Id AND
                             GenoFreeze.Name = '%s' AND
                             Geno.Name = '%s'
-                    """ % (disfieldString, self.db.name, self.name)
+                    """ % (display_fields_string, self.db.name, self.name)
         else: #Temp type
             query = 'SELECT %s FROM %s WHERE Name = "%s"' % \
-                    (string.join(self.db.disfield,','), self.db.type, self.name)
+                    (string.join(self.db.display_fields,','), self.db.type, self.name)
 
 
         self.cursor.execute(query)
@@ -432,7 +433,7 @@ class webqtlTrait:
             self.haveinfo = 1
 
             #XZ: assign SQL query result to trait attributes.
-            for i, field in enumerate(self.db.disfield):
+            for i, field in enumerate(self.db.display_fields):
                 setattr(self, field, traitInfo[i])
 
             if self.db.type == 'Publish':
diff --git a/wqflask/dbFunction/webqtlDatabaseFunction.py b/wqflask/dbFunction/webqtlDatabaseFunction.py
index 7e33da3f..8f923b8a 100755
--- a/wqflask/dbFunction/webqtlDatabaseFunction.py
+++ b/wqflask/dbFunction/webqtlDatabaseFunction.py
@@ -19,14 +19,7 @@
 #
 #
 # This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by Xiaodong Zhou 2011/Jan/20
 
-#webqtlDatabaseFunction.py
-#
-#This file consists of various database related functions; the names are generally self-explanatory.
 
 import MySQLdb
 import string
@@ -206,21 +199,21 @@ def getTissueCountByTissueProbeSetFreezeId(cursor=None, TissueProbeSetFreezeId=N
 
 ###########################################################################
 # input: cursor, TissueProbeSetFreezeId (int)
-# output: DatasetName(string),DatasetFullName(string)
-# function: retrieve DatasetName, DatasetFullName based on TissueProbeSetFreezeId
+# output: DataSetName(string),DataSetFullName(string)
+# function: retrieve DataSetName, DataSetFullName based on TissueProbeSetFreezeId
 ###########################################################################
-def getDatasetNamesByTissueProbeSetFreezeId(cursor=None, TissueProbeSetFreezeId=None):
+def getDataSetNamesByTissueProbeSetFreezeId(cursor=None, TissueProbeSetFreezeId=None):
     query ="select Name, FullName from TissueProbeSetFreeze where Id=%s" % TissueProbeSetFreezeId
     try:
         cursor.execute(query)
         result = cursor.fetchone()
-        DatasetName = result[0]
-        DatasetFullName =result[1]
+        DataSetName = result[0]
+        DataSetFullName =result[1]
     except:
-        DatasetName =None
-        DatasetFullName =None
+        DataSetName =None
+        DataSetFullName =None
 
-    return DatasetName, DatasetFullName
+    return DataSetName, DataSetFullName
 
 ###########################################################################
 # input: cursor, geneIdLst (list)
diff --git a/wqflask/wqflask/correlation/CorrelationPage.py b/wqflask/wqflask/correlation/CorrelationPage.py
index e48ea412..8af30d1e 100644
--- a/wqflask/wqflask/correlation/CorrelationPage.py
+++ b/wqflask/wqflask/correlation/CorrelationPage.py
@@ -47,7 +47,7 @@ from base import webqtlConfig
 from utility.THCell import THCell
 from utility.TDCell import TDCell
 from base.webqtlTrait import webqtlTrait
-from base.webqtlDataset import webqtlDataset
+from base.data_set import create_dataset
 from base.templatePage import templatePage
 from utility import webqtlUtil
 from dbFunction import webqtlDatabaseFunction
@@ -310,7 +310,7 @@ class CorrelationPage(templatePage):
 
         #try:
         #print("target_db_name is:", target_db_name)
-        self.db = webqtlDataset(self.target_db_name, self.cursor)
+        self.db = create_dataset(self.db_conn, self.target_db_name)
         #except:
         #    detail = ["The database you just requested has not been established yet."]
         #    self.error(detail)
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index e2bafb3a..73a72e00 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -147,7 +147,7 @@ class GenotypeSearch(DoSearch):
                 """WHERE %s and
                     Geno.Id = GenoXRef.GenoId and
                     GenoXRef.GenoFreezeId = GenoFreeze.Id and
-                    GenoFreeze.Id = %s"""% (
+                    GenoFreeze.Id = %s""" % (
                         self.get_where_clause(),
                         self.escape(self.dataset.id)))
 
@@ -257,7 +257,7 @@ class GoSearch(ProbeSetSearch):
         statements = ("""%s.symbol=GOgene_product.symbol and
            GOassociation.gene_product_id=GOgene_product.id and
            GOterm.id=GOassociation.term_id""" % (
-            self.db_conn.escape_string(self.dataset.type)))
+            self.escape(self.dataset.type)))
 
         where_clause = " %s = '%s' and %s " % (field, go_id, statements)
 
@@ -317,14 +317,14 @@ class CisLrsSearch(ProbeSetSearch):
                 Geno.SpeciesId = %s and
                 %s.Chr = Geno.Chr and
                 ABS(%s.Mb-Geno.Mb) < %s """ % (
-                    self.dataset.type,
+                    self.escape(self.dataset.type),
                     min(lower_limit, upper_limit),
-                    self.dataset.type,
+                    self.escape(self.dataset.type),
                     max(lower_limit, upper_limit),
-                    self.dataset.type,
+                    self.escape(self.dataset.type),
                     self.species_id,
-                    self.dataset.type,
-                    self.dataset.type,
+                    self.escape(self.dataset.type),
+                    self.escape(self.dataset.type),
                     min_threshold
                     )
         else:
@@ -437,7 +437,7 @@ if __name__ == "__main__":
 
 
     from base import webqtlConfig
-    from base.webqtlDataset import webqtlDataset
+    from base.data_set import create_dataset
     from base.templatePage import templatePage
     from utility import webqtlUtil
     from dbFunction import webqtlDatabaseFunction
@@ -449,13 +449,13 @@ if __name__ == "__main__":
     cursor = db_conn.cursor()
 
     dataset_name = "HC_M2_0606_P"
-    dataset = webqtlDataset(dataset_name, cursor)
+    dataset = create_dataset(db_conn, dataset_name)
 
     #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
     #results = RifSearch("diabetes", dataset, cursor, db_conn).run()
     #results = WikiSearch("nicotine", dataset, cursor, db_conn).run()
-    results = TransLrsSearch(['25','99','10'], dataset, cursor, db_conn).run()
-    #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run()
+    results = CisLrsSearch(['25','99','10'], dataset, cursor, db_conn).run()
+    #results = TransLrsSearch(['25', '999', '10'], dataset, cursor, db_conn).run()
     #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
     #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run()
     #results = GoSearch("0045202", dataset, cursor, db_conn).run()
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 05f062fc..b50e45d5 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -10,7 +10,7 @@ from flask import render_template
 #                                                 #
 ###################################################
 
-import string
+#import string
 import os
 import cPickle
 import re
@@ -29,7 +29,7 @@ from htmlgen import HTMLgen2 as HT
 from base import webqtlConfig
 from utility.THCell import THCell
 from utility.TDCell import TDCell
-from base.webqtlDataset import webqtlDataset
+from base.data_set import create_dataset
 from base.webqtlTrait import webqtlTrait
 from base.templatePage import templatePage
 from wqflask import parser
@@ -43,14 +43,13 @@ from utility import formatting
 
 
 class SearchResultPage(templatePage):
+    #maxReturn = 3000
 
-    maxReturn = 3000
-    nkeywords = 0
 
     def __init__(self, fd):
         print("initing SearchResultPage")
-        import logging_tree
-        logging_tree.printout()
+        #import logging_tree
+        #logging_tree.printout()
         self.fd = fd
         templatePage.__init__(self, fd)
         assert self.openMysql(), "Couldn't open MySQL"
@@ -59,127 +58,40 @@ class SearchResultPage(templatePage):
         self.dataset = fd['dataset']
 
         # change back to self.dataset
-        if not self.dataset or self.dataset == 'spacer':
-            #Error, No dataset selected
-            heading = "Search Result"
-            detail = ['''No dataset was selected for this search, please
-                go back and SELECT at least one dataset.''']
-            self.error(heading=heading,detail=detail,error="No dataset Selected")
-            return
+        #if not self.dataset or self.dataset == 'spacer':
+        #    #Error, No dataset selected
+        #    heading = "Search Result"
+        #    detail = ['''No dataset was selected for this search, please
+        #        go back and SELECT at least one dataset.''']
+        #    self.error(heading=heading,detail=detail,error="No dataset Selected")
+        #    return
 
         ###########################################
         #   Names and IDs of RISet / F2 set
         ###########################################
+        
+        # All Phenotypes is a special case we'll deal with later
         if self.dataset == "All Phenotypes":
             self.cursor.execute("""
                 select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
                 InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
                 PublishFreeze.InbredSetId""")
             results = self.cursor.fetchall()
-            self.dataset = map(lambda x: webqtlDataset(x[0], self.cursor), results)
+            self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
             self.dataset_groups = map(lambda x: x[1], results)
             self.dataset_group_ids = map(lambda x: x[2], results)
-            self.single_group = False
         else:
             print("self.dataset is:", pf(self.dataset))
-            self.dataset = webqtlDataset(self.dataset, self.cursor)
+            self.dataset = create_dataset(self.db_conn, self.dataset)
             print("self.dataset is now:", pf(self.dataset))
-            if self.dataset.type in ("Geno", "ProbeSet"):
-                db_type = self.dataset.type + "Freeze"
-                print("db_type [%s]: %s" % (type(db_type), db_type))
-
-                query = '''SELECT Id, Name, FullName, confidentiality,
-                                    AuthorisedUsers FROM %s WHERE Name = %%s''' % (db_type)
-
-                self.cursor.execute(query, self.dataset.name)
-
-                (indId,
-                 indName,
-                 indFullName,
-                 confidential,
-                 AuthorisedUsers) = self.cursor.fetchall()[0]
-
-                if confidential:
-                    # Allow confidential data later
-                    NoConfindetialDataForYouTodaySorry
-                    #access_to_confidential_dataset = 0
-                    #
-                    ##for the dataset that confidentiality is 1
-                    ##1. 'admin' and 'root' can see all of the dataset
-                    ##2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table)
-                    #if webqtlConfig.USERDICT[self.privilege] > webqtlConfig.USERDICT['user']:
-                    #    access_to_confidential_dataset = 1
-                    #else:
-                    #    AuthorisedUsersList=AuthorisedUsers.split(',')
-                    #    if AuthorisedUsersList.__contains__(self.userName):
-                    #        access_to_confidential_dataset = 1
-                    #
-                    #if not access_to_confidential_dataset:
-                    #    Some error
-
-            #else:
-            #    heading = "Search Result"
-            #    detail = ['''The dataset has not been established yet, please
-            #        go back and SELECT at least one dataset.''']
-            #    self.error(heading=heading,detail=detail,error="No dataset Selected")
-            #    return
-
-            self.dataset.get_group()
-            self.single_group = True
-            #XZ, August 24,2010: Since self.single_group = True, it's safe to assign one species Id.
-            self.species_id = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor,
-                                                                       self.dataset.group)
-
-        #self.db_type = self.dataset.type
-        if self.dataset.type == "Publish":
-            self.search_fields = ['Phenotype.Post_publication_description',
-                                'Phenotype.Pre_publication_description',
-                                'Phenotype.Pre_publication_abbreviation',
-                                'Phenotype.Post_publication_abbreviation',
-                                'Phenotype.Lab_code',
-                                'Publication.PubMed_ID',
-                                'Publication.Abstract',
-                                'Publication.Title',
-                                'Publication.Authors',
-                                'PublishXRef.Id']
-            self.header_fields = ['',
-                                'ID',
-                                'Description',
-                                'Authors',
-                                'Year',
-                                'Max LRS',
-                                'Max LRS Location']            
-
-        elif self.dataset.type == "ProbeSet":
-            self.search_fields = ['Name',
-                                'Description',
-                                'Probe_Target_Description',
-                                'Symbol',
-                                'Alias',
-                                'GenbankId',
-                                'UniGeneId',
-                                'RefSeq_TranscriptId']
-            self.header_fields = ['',
-                                'ID',
-                                'Symbol',
-                                'Description',
-                                'Location',
-                                'Mean Expr',
-                                'Max LRS',
-                                'Max LRS Location']
-        elif self.dataset.type == "Geno":
-            self.search_fields = ['Name',
-                                  'Chr']
-            self.header_fields = ['',
-                                'ID',
-                                'Location']
-
+ 
         self.search()
         self.gen_search_result()
 
 
     def gen_search_result(self):
-        """Get the info displayed in the search result table from the set of results computed in
+        """
+        Get the info displayed in the search result table from the set of results computed in
         the "search" function
         
         """
@@ -191,26 +103,19 @@ class SearchResultPage(templatePage):
             if not result:
                 continue
 
-            seq = 1
             group = self.dataset.group
-
             species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, RISet=group)
 
             #### Excel file needs to be generated ####
 
             print("foo locals are:", locals())
             trait_id = result[0]
-            this_trait = webqtlTrait(db=self.dataset, name=trait_id, cursor=self.cursor)
+            this_trait = webqtlTrait(self.db_conn, db=self.dataset, name=trait_id)
             this_trait.retrieveInfo(QTL=True)
             print("this_trait is:", pf(this_trait))
             self.trait_list.append(this_trait)
-
-        if self.dataset.type == "ProbeSet":
-            self.getTraitInfoForProbeSet(trait_list=self.trait_list, species=species)            
-        elif self.dataset.type == "Publish":
-            self.getTraitInfoForPublish(trait_list=self.trait_list, species=species)
-        elif self.dataset.type == "Geno":
-            self.getTraitInfoForGeno(trait_list=self.trait_list)
+            
+        self.dataset.get_trait_info(self.trait_list, species)    
 
 
     def search(self):
@@ -222,7 +127,7 @@ class SearchResultPage(templatePage):
             print("[kodak] item is:", pf(a_search))
             search_term = a_search['search_term']
             if a_search['key']:
-                search_type = string.upper(a_search['key'])
+                search_type = a_search['key'].upper()
             else:
                 # We fall back to the dataset type as the key to get the right object
                 search_type = self.dataset.type
@@ -258,187 +163,3 @@ class SearchResultPage(templatePage):
             keyword = string.replace(keyword,"?",".")
             wildcardkeyword[i] = keyword#'[[:<:]]'+ keyword+'[[:>:]]'
         return wildcardkeyword
-    
-
-    def getTraitInfoForGeno(self, trait_list):
-        for this_trait in trait_list:
-            if not this_trait.haveinfo:
-                this_trait.retrieveInfo()
-
-            #XZ: trait_location_value is used for sorting
-            trait_location_repr = 'N/A'
-            trait_location_value = 1000000
-
-            if this_trait.chr and this_trait.mb:
-                try:
-                    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
-                except:
-                    if this_trait.chr.upper() == 'X':
-                        trait_location_value = 20*1000 + this_trait.mb
-                    else:
-                        trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
-
-                this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
-                this_trait.location_value = trait_location_value
-
-
-    def getTraitInfoForPublish(self, trait_list, species = ''):
-        for this_trait in trait_list:
-            if not this_trait.haveinfo:
-                this_trait.retrieveInfo(QTL=1)
-
-            description = this_trait.post_publication_description
-            if this_trait.confidential:
-                if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
-                    description = this_trait.pre_publication_description
-            this_trait.description_display = description
-
-            if not this_trait.year.isdigit():
-                this_trait.pubmed_text = "N/A"
-
-            if this_trait.pubmed_id:
-                this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id
-
-            #LRS and its location
-            this_trait.LRS_score_repr = "N/A"
-            this_trait.LRS_score_value = 0
-            this_trait.LRS_location_repr = "N/A"
-            this_trait.LRS_location_value = 1000000
-
-            if this_trait.lrs:
-                self.cursor.execute("""
-                    select Geno.Chr, Geno.Mb from Geno, Species
-                    where Species.Name = '%s' and
-                        Geno.Name = '%s' and
-                        Geno.SpeciesId = Species.Id
-                """ % (species, this_trait.locus))
-                result = self.cursor.fetchone()
-
-                if result:
-                    if result[0] and result[1]:
-                        LRS_Chr = result[0]
-                        LRS_Mb = result[1]
-
-                        #XZ: LRS_location_value is used for sorting
-                        try:
-                            LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
-                        except:
-                            if LRS_Chr.upper() == 'X':
-                                LRS_location_value = 20*1000 + float(LRS_Mb)
-                            else:
-                                LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
-
-                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
-                        this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
-                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
-
-
-    def getTraitInfoForProbeSet(self, trait_list=None, species=''):
-
-        #  Note: setting trait_list to [] is probably not a great idea.
-        if not trait_list:
-            trait_list = []
-
-        for this_trait in trait_list:
-
-            if not this_trait.haveinfo:
-                this_trait.retrieveInfo(QTL=1)
-
-            if this_trait.symbol:
-                pass
-            else:
-                this_trait.symbol = "N/A"
-
-            #XZ, 12/08/2008: description
-            #XZ, 06/05/2009: Rob asked to add probe target description
-            description_string = str(this_trait.description).strip()
-            target_string = str(this_trait.probe_target_description).strip()
-
-            description_display = ''
-
-            if len(description_string) > 1 and description_string != 'None':
-                description_display = description_string
-            else:
-                description_display = this_trait.symbol
-
-            if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None':
-                description_display = description_display + '; ' + target_string.strip()
-
-            # Save it for the jinja2 tablet
-            this_trait.description_display = description_display
-
-            #XZ: trait_location_value is used for sorting
-            trait_location_repr = 'N/A'
-            trait_location_value = 1000000
-
-            if this_trait.chr and this_trait.mb:
-                try:
-                    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
-                except:
-                    if this_trait.chr.upper() == 'X':
-                        trait_location_value = 20*1000 + this_trait.mb
-                    else:
-                        trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
-
-                this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) )
-                this_trait.location_value = trait_location_value
-                #this_trait.trait_location_value = trait_location_value
-
-            #XZ, 01/12/08: This SQL query is much faster.
-            query = (
-"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
-    where ProbeSetXRef.ProbeSetFreezeId = %s and
-    ProbeSet.Id = ProbeSetXRef.ProbeSetId and
-    ProbeSet.Name = '%s'
-            """ % (self.db_conn.escape_string(str(this_trait.db.id)),
-                   self.db_conn.escape_string(this_trait.name)))
-
-            print("query is:", pf(query))
-            
-            self.cursor.execute(query)
-            result = self.cursor.fetchone()
-
-            if result:
-                if result[0]:
-                    mean = result[0]
-                else:
-                    mean=0
-            else:
-                mean = 0
-
-            #XZ, 06/05/2009: It is neccessary to turn on nowrap
-            this_trait.mean = repr = "%2.3f" % mean
-
-            #LRS and its location
-            this_trait.LRS_score_repr = 'N/A'
-            this_trait.LRS_score_value = 0
-            this_trait.LRS_location_repr = 'N/A'
-            this_trait.LRS_location_value = 1000000
-
-            #Max LRS and its Locus location
-            if this_trait.lrs and this_trait.locus:
-                self.cursor.execute("""
-                    select Geno.Chr, Geno.Mb from Geno, Species
-                    where Species.Name = '%s' and
-                        Geno.Name = '%s' and
-                        Geno.SpeciesId = Species.Id
-                """ % (species, this_trait.locus))
-                result = self.cursor.fetchone()
-
-                if result:
-                    if result[0] and result[1]:
-                        LRS_Chr = result[0]
-                        LRS_Mb = result[1]
-
-                        #XZ: LRS_location_value is used for sorting
-                        try:
-                            LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
-                        except:
-                            if LRS_Chr.upper() == 'X':
-                                LRS_location_value = 20*1000 + float(LRS_Mb)
-                            else:
-                                LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
-
-                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
-                        this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
-                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )