From a7cc1119ebfbfab3ba5260be75c87cd4496f09b7 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Wed, 5 Dec 2012 18:03:23 -0600
Subject: Renamed webqtlTrait.py to trait.py Renamed webqtlTrait class to
 GeneralTrait

Began process of removing fd from show_trait.py

Created DatasetGroup object in data_set.py (this may end up becoming
its own file later if it becomes big enough)
---
 misc/notes.txt                           |  10 +-
 misc/todo.txt                            |   4 +-
 wqflask/base/data_set.py                 | 221 +++++++++-
 wqflask/base/trait.py                    | 708 +++++++++++++++++++++++++++++++
 wqflask/base/webqtlTrait.py              | 695 ------------------------------
 wqflask/wqflask/do_search.py             |   5 +-
 wqflask/wqflask/search_results.py        |   5 +-
 wqflask/wqflask/show_trait/show_trait.py | 295 ++++++++-----
 8 files changed, 1129 insertions(+), 814 deletions(-)
 create mode 100755 wqflask/base/trait.py
 delete mode 100755 wqflask/base/webqtlTrait.py

diff --git a/misc/notes.txt b/misc/notes.txt
index 59ab79cb..b0c0762c 100644
--- a/misc/notes.txt
+++ b/misc/notes.txt
@@ -14,6 +14,9 @@ export TERM=screen
 To search for commands in history if necessary:
 history | grep "(whatever is being searched for)"
 
+Run web server:
+/usr/local/nginx/sbin/nginx
+
 Run server:
 python runserver.py
 
@@ -63,11 +66,16 @@ Classes should always inherit "object"
 htop: Gives information on processes, cpu/memory load, etc
 dstat: Also gives various system information, resource usage, etc
 df: Reports file system disk space usage
-
+d
 ===========================================
 
 tidyp - Improves/beautifies html code
 tidyp -m -i -w 100 index_page.html
 
+===========================================
+
+ps -ax - View processes
+
+kill (process #)
 
 
diff --git a/misc/todo.txt b/misc/todo.txt
index 609e053f..60655a71 100644
--- a/misc/todo.txt
+++ b/misc/todo.txt
@@ -1 +1,3 @@
-- Read about grep/locate/find
\ No newline at end of file
+- Check about using trait id instead of trait name in queries in data_set.py
+
+- Ask Rob about Probe/cellid traits
\ No newline at end of file
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 70b33014..68f5e5ed 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -21,12 +21,16 @@
 # This module is used by GeneNetwork project (www.genenetwork.org)
 
 from __future__ import print_function, division
+import os
 
 from flask import Flask, g
 
 from htmlgen import HTMLgen2 as HT
 
+import reaper
+
 import webqtlConfig
+from utility import webqtlUtil
 
 from MySQLdb import escape_string as escape
 from pprint import pformat as pf
@@ -57,6 +61,74 @@ def create_dataset(dataset_name):
     return dataset_class(dataset_name)
 
 
+class DatasetGroup(object):
+    """
+    Each group has multiple datasets; each species has multiple groups.
+    
+    For example, Mouse has multiple groups (BXD, BXA, etc), and each group
+    has multiple datasets associated with it.
+    
+    """
+    def __init__(self, dataset):
+        """This sets self.group and self.group_id"""
+        self.name, self.group_id = g.db.execute(dataset.query).fetchone()
+        if self.name == 'BXD300':
+            self.name = "BXD"
+        
+        self.incparentsf1 = False
+            
+            
+    #def read_genotype(self):
+    #    self.read_genotype_file()
+    #
+    #    if not self.genotype:   # Didn'd succeed, so we try method 2
+    #        self.read_genotype_data()
+            
+    def read_genotype_file(self):
+        '''read genotype from .geno file instead of database'''
+        #if self.group == 'BXD300':
+        #    self.group = 'BXD'
+        #
+        #assert self.group, "self.group needs to be set"
+
+        #genotype_1 is Dataset Object without parents and f1
+        #genotype_2 is Dataset Object with parents and f1 (not for intercross)
+
+        self.genotype_1 = reaper.Dataset()
+        
+        # reaper barfs on unicode filenames, so here we ensure it's a string
+        full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
+        self.genotype_1.read(full_filename)
+
+        print("Got to after read")
+
+        try:
+            # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py;
+            _f1, _f12, _mat, _pat = webqtlUtil.ParInfo[self.name]
+        except KeyError:
+            _f1 = _f12 = _mat = _pat = None
+
+        self.genotype_2 = self.genotype_1
+        if self.genotype_1.type == "group" and _mat and _pat:
+            self.genotype_2 = self.genotype_1.add(Mat=_mat, Pat=_pat)       #, F1=_f1)
+
+        #determine default genotype object
+        if self.incparentsf1 and self.genotype_1.type != "intercross":
+            self.genotype = self.genotype_2
+        else:
+            self.incparentsf1 = 0
+            self.genotype = self.genotype_1
+
+        self.samplelist = list(self.genotype.prgy)
+        self.f1list = []
+        self.parlist = []
+
+        if _f1 and _f12:
+            self.f1list = [_f1, _f12]
+        if _mat and _pat:
+            self.parlist = [_mat, _pat]
+
+
 class DataSet(object):
     """
     DataSet class defines a dataset in webqtl, can be either Microarray,
@@ -70,27 +142,35 @@ class DataSet(object):
         self.name = name
         self.id = None
         self.type = None
-        self.group = None
 
         self.setup()
 
         self.check_confidentiality()
 
         self.retrieve_name()
-        self.get_group()
+        self.group = DatasetGroup(self)   # sets self.group and self.group_id
+       
+        
+    def get_desc(self):
+        """Gets overridden later, at least for Temp...used by trait's get_given_name"""
+        return None
 
 
     # Delete this eventually
     @property
     def riset():
         Weve_Renamed_This_As_Group
+        
+        
+    #@property
+    #def group(self):
+    #    if not self._group:
+    #        self.get_group()
+    #        
+    #    return self._group
+    
 
 
-    def get_group(self):
-        self.group, self.group_id = g.db.execute(self.query).fetchone()
-        if self.group == 'BXD300':
-            self.group = "BXD"
-        #return group
 
 
     def retrieve_name(self):
@@ -176,7 +256,7 @@ class PhenotypeDataSet(DataSet):
 
         self.type = 'Publish'
 
-        self.query = '''
+        self.query_for_group = '''
                             SELECT
                                     InbredSet.Name, InbredSet.Id
                             FROM
@@ -239,7 +319,29 @@ class PhenotypeDataSet(DataSet):
 
                         this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
                         this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
-                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
+                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb))
+                        
+    def retrieve_sample_data(self, trait):
+        query = """
+                    SELECT
+                            Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
+                    FROM
+                            (PublishData, Strain, PublishXRef, PublishFreeze)
+                    left join PublishSE on
+                            (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+                    left join NStrain on
+                            (NStrain.DataId = PublishData.Id AND
+                            NStrain.StrainId = PublishData.StrainId)
+                    WHERE
+                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                            PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
+                            PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+                    Order BY
+                            Strain.Name
+                    """ % (self.trait.name, self.id)
+        results = g.db.execute(query).fetchall()
+        return results
+
 
 class GenotypeDataSet(DataSet):
     DS_NAME_MAP['Geno'] = 'GenotypeDataSet'
@@ -297,6 +399,26 @@ class GenotypeDataSet(DataSet):
 
                 this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
                 this_trait.location_value = trait_location_value
+                
+    def retrieve_sample_data(self, trait):
+        query = """
+                    SELECT
+                            Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
+                    FROM
+                            (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
+                    left join GenoSE on
+                            (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+                    WHERE
+                            Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+                            GenoFreeze.Name = '%s' AND
+                            GenoXRef.DataId = GenoData.Id AND
+                            GenoData.StrainId = Strain.Id
+                    Order BY
+                            Strain.Name
+                    """ % (webqtlDatabaseFunction.retrieve_species_id(self.group), trait.name, self.name)
+        results = g.db.execute(query).fetchall()
+        return results
 
 
 class MrnaAssayDataSet(DataSet):
@@ -476,6 +598,42 @@ class MrnaAssayDataSet(DataSet):
                         this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
                         this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
                         this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
+                        
+    def get_sequence(self):
+        query = """
+                    SELECT
+                            ProbeSet.BlatSeq
+                    FROM
+                            ProbeSet, ProbeSetFreeze, ProbeSetXRef
+                    WHERE
+                            ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+                            ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+                            ProbeSet.Name = %s
+                            ProbeSetFreeze.Name = %s
+                """ % (escape(self.name), escape(self.dataset.name))
+        results = g.db.execute(query).fetchone()
+
+        return results[0]
+    
+    def retrieve_sample_data(self, trait):
+        query = """
+                    SELECT
+                            Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+                    FROM
+                            (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                    left join ProbeSetSE on
+                            (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                    WHERE
+                            ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetFreeze.Name = '%s' AND
+                            ProbeSetXRef.DataId = ProbeSetData.Id AND
+                            ProbeSetData.StrainId = Strain.Id
+                    Order BY
+                            Strain.Name
+                    """ % (escape(trait.name), escape(self.name))
+        results = g.db.execute(query).fetchall()
+        return results
 
 
 class TempDataSet(DataSet):
@@ -497,6 +655,51 @@ class TempDataSet(DataSet):
         self.id = 1
         self.fullname = 'Temporary Storage'
         self.shortname = 'Temp'
+        
+       
+    @staticmethod
+    def handle_pca(desc):
+        if 'PCA' in desc:
+            # Todo: Modernize below lines
+            desc = desc[desc.rindex(':')+1:].strip()
+        else:
+            desc = desc[:desc.index('entered')].strip()
+        return desc
+        
+    def get_desc(self):
+        g.db.execute('SELECT description FROM Temp WHERE Name=%s', self.name)
+        desc = g.db.fetchone()[0]
+        desc = self.handle_pca(desc)
+        return desc    
+        
+    def get_group(self):
+        self.cursor.execute("""
+                    SELECT
+                            InbredSet.Name, InbredSet.Id
+                    FROM
+                            InbredSet, Temp
+                    WHERE
+                            Temp.InbredSetId = InbredSet.Id AND
+                            Temp.Name = "%s"
+            """, self.name)
+        self.group, self.group_id = self.cursor.fetchone()
+        #return self.group
+        
+    def retrieve_sample_data(self, trait):
+        query = """
+                SELECT
+                        Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
+                FROM
+                        TempData, Temp, Strain
+                WHERE
+                        TempData.StrainId = Strain.Id AND
+                        TempData.Id = Temp.DataId AND
+                        Temp.name = '%s'
+                Order BY
+                        Strain.Name
+                """ % escape(trait.name)
+                
+        results = g.db.execute(query).fetchall()
 
 
 def geno_mrna_confidentiality(ob):
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
new file mode 100755
index 00000000..d3753fc1
--- /dev/null
+++ b/wqflask/base/trait.py
@@ -0,0 +1,708 @@
+from __future__ import division, print_function
+
+import string
+
+from htmlgen import HTMLgen2 as HT
+
+import webqtlConfig
+from webqtlCaseData import webqtlCaseData
+from data_set import create_dataset
+from dbFunction import webqtlDatabaseFunction
+from utility import webqtlUtil
+
+from MySQLdb import escape_string as escape
+from pprint import pformat as pf
+
+from flask import Flask, g
+
+class GeneralTrait:
+    """
+    Trait class defines a trait in webqtl, can be either Microarray,
+    Published phenotype, genotype, or user input trait
+
+    """
+
+    def __init__(self, **kw):
+        print("in GeneralTrait")
+        self.dataset = kw.get('dataset', None)                  # database object
+        self.name = kw.get('name', None)                 # Trait ID, ProbeSet ID, Published ID, etc.
+        self.cellid = kw.get('cellid', None)
+        self.identification = kw.get('identification', 'un-named trait')
+        #self.group = kw.get('group', None)
+        self.haveinfo = kw.get('haveinfo', False)
+        self.sequence = kw.get('sequence', None)              # Blat sequence, available for ProbeSet
+        self.data = kw.get('data', {})
+        
+        if kw.get('fullname'):
+            name2 = value.split("::")
+            if len(name2) == 2:
+                self.dataset, self.name = name2
+            elif len(name2) == 3:
+                self.dataset, self.name, self.cellid = name2
+                
+        #if self.dataset and isinstance(self.dataset, basestring):
+        self.dataset = create_dataset(self.dataset)
+
+        print("self.dataset is:", self.dataset, type(self.dataset))
+        #if self.dataset:
+        
+        #self.dataset.get_group()
+        
+        #if self.dataset.type == "Temp":
+        #    self.cursor.execute('''
+        #            SELECT
+        #                    InbredSet.Name
+        #            FROM
+        #                    InbredSet, Temp
+        #            WHERE
+        #                    Temp.InbredSetId = InbredSet.Id AND
+        #                    Temp.Name = "%s"
+        #    ''', self.name)
+        #    self.group = self.cursor.fetchone()[0]
+        #else:
+        #    self.group = self.dataset.get_group()
+
+        #print("trinity, self.group is:", self.group)
+
+        #
+        # In ProbeSet, there are maybe several annotations match one sequence
+        # so we need use sequence(BlatSeq) as the identification, when we update
+        # one annotation, we update the others who match the sequence also.
+        #
+        # Hongqiang Li, 3/3/2008
+        #
+
+        #XZ, 05/08/2009: This block is not neccessary. We can add 'BlatSeq' into disfield.
+        # The variable self.sequence should be changed to self.BlatSeq
+        # It also should be changed in other places where it are used.
+
+        #if self.dataset:
+        #if self.dataset.type == 'ProbeSet':
+        #    print("Doing ProbeSet Query")
+        #    query = '''
+        #            SELECT
+        #                    ProbeSet.BlatSeq
+        #            FROM
+        #                    ProbeSet, ProbeSetFreeze, ProbeSetXRef
+        #            WHERE
+        #                    ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+        #                    ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
+        #                    ProbeSet.Name = %s and
+        #                    ProbeSetFreeze.Name = %s
+        #    ''', (self.name, self.dataset.name)
+        #    print("query is:", query)
+        #    self.sequence = g.db.execute(*query).fetchone()[0]
+        #    #self.sequence = self.cursor.fetchone()[0]
+        #    print("self.sequence is:", self.sequence)
+
+
+    def get_name(self):
+        stringy = ""
+        if self.dataset and self.name:
+            stringy = "%s::%s" % (self.dataset, self.name)
+            if self.cellid:
+                stringy += "::" + self.cellid
+        else:
+            stringy = self.description
+        return stringy
+
+
+    def get_given_name(self):
+        """    
+         when user enter a trait or GN generate a trait, user want show the name
+         not the name that generated by GN randomly, the two follow function are
+         used to give the real name and the database. displayName() will show the
+         database also, getGivenName() just show the name.
+         For other trait, displayName() as same as getName(), getGivenName() as
+         same as self.name
+        
+         Hongqiang 11/29/07
+         
+        """
+        stringy = self.name
+        if self.dataset and self.name:
+            desc = self.dataset.get_desc()  
+            if desc:
+                #desc = self.handle_pca(desc)
+                stringy = desc
+        return stringy
+    
+
+
+    def display_name(self):
+        stringy = ""
+        if self.dataset and self.name:
+            desc = self.dataset.get_desc()
+            #desc = self.handle_pca(desc)
+            if desc:
+                #desc = self.handle_pca(desc)
+                #stringy = desc
+                #if desc.__contains__('PCA'):
+                #    desc = desc[desc.rindex(':')+1:].strip()
+                #else:
+                #    desc = desc[:desc.index('entered')].strip()
+                #desc = self.handle_pca(desc)
+                stringy = "%s::%s" % (self.dataset, desc)
+            else:
+                stringy = "%s::%s" % (self.dataset, self.name)
+                if self.cellid:
+                    stringy += "::" + self.cellid
+        else:
+            stringy = self.description
+
+        return stringy
+
+
+    #def __str__(self):
+    #       #return "%s %s" % (self.getName(), self.group)
+    #       return self.getName()
+    #__str__ = getName
+    #__repr__ = __str__
+
+    def export_data(self, samplelist, the_type="val"):
+        """
+        export data according to samplelist
+        mostly used in calculating correlation
+        
+        """
+        result = []
+        for sample in samplelist:
+            if self.data.has_key(sample):
+                if the_type=='val':
+                    result.append(self.data[sample].val)
+                elif the_type=='var':
+                    result.append(self.data[sample].var)
+                elif the_type=='N':
+                    result.append(self.data[sample].N)
+                else:
+                    raise KeyError, `the_type`+' the_type is incorrect.'
+            else:
+                result.append(None)
+        return result
+
+    def export_informative(self, incVar=0):
+        """
+        export informative sample
+        mostly used in qtl regression
+        
+        """
+        samples = []
+        vals = []
+        the_vars = []
+        for sample, value in self.data.items():
+            if value.val != None:
+                if not incVar or value.var != None:
+                    samples.append(sample)
+                    vals.append(value.val)
+                    the_vars.append(value.var)
+        return  samples, vals, the_vars
+
+
+    #
+    # In ProbeSet, there are maybe several annotations match one sequence
+    # so we need use sequence(BlatSeq) as the identification, when we update
+    # one annotation, we update the others who match the sequence also.
+    #
+    # Hongqiang Li, 3/3/2008
+    #
+    #def getSequence(self):
+    #    assert self.cursor
+    #    if self.dataset.type == 'ProbeSet':
+    #        self.cursor.execute('''
+    #                        SELECT
+    #                                ProbeSet.BlatSeq
+    #                        FROM
+    #                                ProbeSet, ProbeSetFreeze, ProbeSetXRef
+    #                        WHERE
+    #                                ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+    #                                ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+    #                                ProbeSet.Name = %s
+    #                                ProbeSetFreeze.Name = %s
+    #                ''', self.name, self.dataset.name)
+    #        #self.cursor.execute(query)
+    #        results = self.fetchone()
+    #
+    #        return results[0]
+
+
+
+    def retrieve_sample_data(self, samplelist=None):
+        if samplelist == None:
+            samplelist = []
+            
+        assert self.dataset
+        
+        #if self.cellid:
+        #     #Probe Data
+        #    query = '''
+        #            SELECT
+        #                    Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id
+        #            FROM
+        #                    (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef,
+        #                    Strain, Probe, ProbeSet)
+        #            left join ProbeSE on
+        #                    (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
+        #            WHERE
+        #                    Probe.Name = '%s' AND ProbeSet.Name = '%s' AND
+        #                    Probe.ProbeSetId = ProbeSet.Id AND
+        #                    ProbeXRef.ProbeId = Probe.Id AND
+        #                    ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND
+        #                    ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+        #                    ProbeSetFreeze.Name = '%s' AND
+        #                    ProbeXRef.DataId = ProbeData.Id AND
+        #                    ProbeData.StrainId = Strain.Id
+        #            Order BY
+        #                    Strain.Name
+        #            ''' % (self.cellid, self.name, self.dataset.name)
+        #            
+        #else:
+        results = self.dataset.retrieve_sample_data(self)
+
+        #if self.dataset.type == 'Temp':
+        #    query = '''
+        #            SELECT
+        #                    Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
+        #            FROM
+        #                    TempData, Temp, Strain
+        #            WHERE
+        #                    TempData.StrainId = Strain.Id AND
+        #                    TempData.Id = Temp.DataId AND
+        #                    Temp.name = '%s'
+        #            Order BY
+        #                    Strain.Name
+        #            ''' % self.name
+        ##XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE
+        #elif self.dataset.type == 'Publish':
+        #    query = '''
+        #            SELECT
+        #                    Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
+        #            FROM
+        #                    (PublishData, Strain, PublishXRef, PublishFreeze)
+        #            left join PublishSE on
+        #                    (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+        #            left join NStrain on
+        #                    (NStrain.DataId = PublishData.Id AND
+        #                    NStrain.StrainId = PublishData.StrainId)
+        #            WHERE
+        #                    PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+        #                    PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
+        #                    PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+        #            Order BY
+        #                    Strain.Name
+        #            ''' % (self.name, self.dataset.id)
+
+        #XZ, 03/02/2009: Xiaodong changed Data to ProbeData, SE to ProbeSE
+        #elif self.cellid:
+           
+        #XZ, 03/02/2009: Xiaodong added this block for ProbeSetData and ProbeSetSE
+        #elif self.dataset.type == 'ProbeSet':
+        #    #ProbeSet Data
+        #    query = '''
+        #            SELECT
+        #                    Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+        #            FROM
+        #                    (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+        #            left join ProbeSetSE on
+        #                    (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+        #            WHERE
+        #                    ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+        #                    ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+        #                    ProbeSetFreeze.Name = '%s' AND
+        #                    ProbeSetXRef.DataId = ProbeSetData.Id AND
+        #                    ProbeSetData.StrainId = Strain.Id
+        #            Order BY
+        #                    Strain.Name
+        #            ''' % (self.name, self.dataset.name)
+        ##XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE
+        #else:
+        #    #Geno Data
+        #    #XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search.
+        #    query = '''
+        #            SELECT
+        #                    Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
+        #            FROM
+        #                    (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
+        #            left join GenoSE on
+        #                    (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+        #            WHERE
+        #                    Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+        #                    GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+        #                    GenoFreeze.Name = '%s' AND
+        #                    GenoXRef.DataId = GenoData.Id AND
+        #                    GenoData.StrainId = Strain.Id
+        #            Order BY
+        #                    Strain.Name
+        #            ''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group), self.name, self.dataset.name)
+
+
+        #self.cursor.execute(query)
+        #results = self.cursor.fetchall()
+        
+        # Todo: is this necessary? If not remove
+        self.data.clear()
+
+        if results:
+            #self.mysqlid = results[0][-1]
+            #if samplelist:
+            for item in results:
+                #name, value, variance, num_cases = item
+                if not samplelist or (samplelist and name in samplelist):
+                    #if value != None:
+                    #    num_cases = None
+                    #    if self.dataset.type in ('Publish', 'Temp'):
+                    #        ndata = item[3]
+                    name = item[0]
+                    self.data[name] = webqtlCaseData(*item)   #name, value, variance, num_cases)
+                #end for
+        #    else:
+        #        for item in results:
+        #            val = item[1]
+        #            if val != None:
+        #                var = item[2]
+        #                ndata = None
+        #                if self.dataset.type in ('Publish', 'Temp'):
+        #                    ndata = item[3]
+        #                self.data[item[0]] = webqtlCaseData(val, var, ndata)
+        #        #end for
+        #    #end if
+
+    #def keys(self):
+    #    return self.__dict__.keys()
+    #
+    #def has_key(self, key):
+    #    return self.__dict__.has_key(key)
+    #
+    #def items(self):
+    #    return self.__dict__.items()
+
+    def retrieve_info(self, QTL=False):
+        assert self.dataset, "Dataset doesn't exist"
+        if self.dataset.type == 'Publish':
+            query = """
+                    SELECT
+                            PublishXRef.Id, Publication.PubMed_ID,
+                            Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
+                            Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
+                            Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
+                            Publication.Authors, Publication.Title, Publication.Abstract,
+                            Publication.Journal, Publication.Volume, Publication.Pages,
+                            Publication.Month, Publication.Year, PublishXRef.Sequence,
+                            Phenotype.Units, PublishXRef.comments
+                    FROM
+                            PublishXRef, Publication, Phenotype, PublishFreeze
+                    WHERE
+                            PublishXRef.Id = %s AND
+                            Phenotype.Id = PublishXRef.PhenotypeId AND
+                            Publication.Id = PublishXRef.PublicationId AND
+                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                            PublishFreeze.Id = %s
+                    """ % (self.name, self.dataset.id)
+            traitInfo = g.db.execute(query).fetchone()
+        #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
+        #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
+        elif self.dataset.type == 'ProbeSet':
+            display_fields_string = ', ProbeSet.'.join(self.dataset.display_fields)
+            display_fields_string = 'ProbeSet.' + display_fields_string
+            query = """
+                    SELECT %s
+                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+                    WHERE
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetFreeze.Name = '%s' AND
+                            ProbeSet.Name = '%s'
+                    """ % (escape(display_fields_string),
+                           escape(self.dataset.name),
+                           escape(self.name))
+            traitInfo = g.db.execute(query).fetchone()
+            print("traitInfo is: ", pf(traitInfo))
+        #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
+        # to avoid the problem of same marker name from different species.
+        elif self.dataset.type == 'Geno':
+            display_fields_string = string.join(self.dataset.display_fields,',Geno.')
+            display_fields_string = 'Geno.' + display_fields_string
+            query = """
+                    SELECT %s
+                    FROM Geno, GenoFreeze, GenoXRef
+                    WHERE
+                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+                            GenoXRef.GenoId = Geno.Id AND
+                            GenoFreeze.Name = '%s' AND
+                            Geno.Name = '%s'
+                    """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name))
+            traitInfo = g.db.execute(query).fetchone()
+            print("traitInfo is: ", pf(traitInfo))
+        else: #Temp type
+            query = """SELECT %s FROM %s WHERE Name = %s
+                                     """ % (string.join(self.dataset.display_fields,','),
+                                            self.dataset.type, self.name)
+            traitInfo = g.db.execute(query).fetchone()
+
+
+        #self.cursor.execute(query)
+        #traitInfo = self.cursor.fetchone()
+        if traitInfo:
+            self.haveinfo = True
+
+            #XZ: assign SQL query result to trait attributes.
+            for i, field in enumerate(self.dataset.display_fields):
+                setattr(self, field, traitInfo[i])
+
+            if self.dataset.type == 'Publish':
+                self.confidential = 0
+                if self.pre_publication_description and not self.pubmed_id:
+                    self.confidential = 1
+
+            self.homologeneid = None
+            if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
+                #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
+                #XZ: So I have to test if geneid is number before execute the query.
+                #XZ: The geneid values in database should be cleaned up.
+                try:
+                    junk = float(self.geneid)
+                    geneidIsNumber = 1
+                except:
+                    geneidIsNumber = 0
+
+                if geneidIsNumber:
+                    query = """
+                            SELECT
+                                    HomologeneId
+                            FROM
+                                    Homologene, Species, InbredSet
+                            WHERE
+                                    Homologene.GeneId =%s AND
+                                    InbredSet.Name = '%s' AND
+                                    InbredSet.SpeciesId = Species.Id AND
+                                    Species.TaxonomyId = Homologene.TaxonomyId
+                            """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
+                    result = g.db.execute(query).fetchone()
+                else:
+                    result = None
+
+                if result:
+                    self.homologeneid = result[0]
+
+            if QTL:
+                if self.dataset.type == 'ProbeSet' and not self.cellid:
+                    traitQTL = g.db.execute("""
+                            SELECT
+                                    ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean
+                            FROM
+                                    ProbeSetXRef, ProbeSet
+                            WHERE
+                                    ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                                    ProbeSet.Name = "%s" AND
+                                    ProbeSetXRef.ProbeSetFreezeId =%s
+                            """, (self.name, self.dataset.id)).fetchone()
+                    #self.cursor.execute(query)
+                    #traitQTL = self.cursor.fetchone()
+                    if traitQTL:
+                        self.locus, self.lrs, self.pvalue, self.mean = traitQTL
+                    else:
+                        self.locus = self.lrs = self.pvalue = self.mean = ""
+                if self.dataset.type == 'Publish':
+                    traitQTL = g.db.execute("""
+                            SELECT
+                                    PublishXRef.Locus, PublishXRef.LRS
+                            FROM
+                                    PublishXRef, PublishFreeze
+                            WHERE
+                                    PublishXRef.Id = %s AND
+                                    PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                                    PublishFreeze.Id =%s
+                            """, (self.name, self.dataset.id)).fetchone()
+                    #self.cursor.execute(query)
+                    #traitQTL = self.cursor.fetchone()
+                    if traitQTL:
+                        self.locus, self.lrs = traitQTL
+                    else:
+                        self.locus = self.lrs = ""
+        else:
+            raise KeyError, `self.name`+' information is not found in the database.'
+
+    def genHTML(self, formName = "", dispFromDatabase=0, privilege="guest", userName="Guest", authorized_users=""):
+        if not self.haveinfo:
+            self.retrieveInfo()
+
+        if self.dataset.type == 'Publish':
+            PubMedLink = ""
+            if self.pubmed_id:
+                PubMedLink = HT.Href(text="PubMed %d : " % self.pubmed_id,
+                target = "_blank", url = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id)
+            else:
+                PubMedLink = HT.Span("Unpublished : ", Class="fs15")
+
+            if formName:
+                setDescription2 = HT.Href(url="javascript:showDatabase3('%s','%s','%s','')" %
+                (formName, self.dataset.name, self.name), Class = "fs14")
+            else:
+                setDescription2 = HT.Href(url="javascript:showDatabase2('%s','%s','')" %
+                (self.dataset.name,self.name), Class = "fs14")
+
+            if self.confidential and not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=privilege, userName=userName, authorized_users=authorized_users):
+                setDescription2.append('RecordID/%s - %s' % (self.name, self.pre_publication_description))
+            else:
+                setDescription2.append('RecordID/%s - %s' % (self.name, self.post_publication_description))
+
+            #XZ 03/26/2011: Xiaodong comment out the following two lins as Rob asked. Need to check with Rob why in PublishXRef table, there are few row whose Sequence > 1.
+            #if self.sequence > 1:
+            #       setDescription2.append(' btach %d' % self.sequence)
+            if self.authors:
+                a1 = string.split(self.authors,',')[0]
+                while a1[0] == '"' or a1[0] == "'" :
+                    a1 = a1[1:]
+                setDescription2.append(' by ')
+                setDescription2.append(HT.Italic('%s, and colleagues' % a1))
+            setDescription = HT.Span(PubMedLink, setDescription2)
+
+        elif self.dataset.type == 'Temp':
+            setDescription = HT.Href(text="%s" % (self.description),url="javascript:showDatabase2\
+            ('%s','%s','')" % (self.dataset.name,self.name), Class = "fs14")
+            setDescription = HT.Span(setDescription)
+
+        elif self.dataset.type == 'Geno': # Genome DB only available for single search
+            if formName:
+                setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
+        '%2.3f' % self.mb),url="javascript:showDatabase3('%s','%s','%s','')" % \
+        (formName, self.dataset.name, self.name), Class = "fs14")
+            else:
+                setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
+        '%2.3f' % self.mb),url="javascript:showDatabase2('%s','%s','')" % \
+        (self.dataset.name,self.name), Class = "fs14")
+
+            setDescription = HT.Span(setDescription)
+
+        else:
+            if self.cellid:
+                if formName:
+                    setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name, self.cellid),url=\
+            "javascript:showDatabase3('%s','%s','%s','%s')" % (formName, self.dataset.name,self.name,self.cellid), \
+            Class = "fs14")
+                else:
+                    setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name,self.cellid),url=\
+            "javascript:showDatabase2('%s','%s','%s')" % (self.dataset.name,self.name,self.cellid), \
+            Class = "fs14")
+            else:
+                if formName:
+                    setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
+            "javascript:showDatabase3('%s','%s','%s','')" % (formName, self.dataset.name,self.name), \
+            Class = "fs14")
+                else:
+                    setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
+            "javascript:showDatabase2('%s','%s','')" % (self.dataset.name,self.name), \
+            Class = "fs14")
+            if self.symbol and self.chr and self.mb:
+                setDescription.append(' [')
+                setDescription.append(HT.Italic('%s' % self.symbol,Class="cdg fwb"))
+                setDescription.append(' on Chr %s @ %s Mb]' % (self.chr,self.mb))
+            if self.description:
+                setDescription.append(': %s' % self.description)
+            if self.probe_target_description:
+                setDescription.append('; %s' % self.probe_target_description)
+            setDescription = HT.Span(setDescription)
+
+        if self.dataset.type != 'Temp' and dispFromDatabase:
+            setDescription.append( ' --- FROM : ')
+            setDescription.append(self.dataset.genHTML(Class='cori'))
+        return setDescription
+
+    @property
+    def description_fmt(self):
+        '''Return a text formated description'''
+        if self.description:
+            formatted = self.description
+            if self.probe_target_description:
+                formatted += "; " + self.probe_target_description
+        else:
+            formatted = "Not available"
+        return formatted.capitalize()
+
+    @property
+    def alias_fmt(self):
+        '''Return a text formatted alias'''
+        if self.alias:
+            alias = string.replace(self.alias, ";", " ")
+            alias = string.join(string.split(alias), ", ")
+        return alias
+
+
+    @property
+    def location_fmt(self):
+        '''Return a text formatted location
+
+        While we're at it we set self.location in case we need it later (do we?)
+
+        '''
+
+        if self.chr and self.mb:
+            self.location = 'Chr %s @ %s Mb'  % (self.chr,self.mb)
+        elif self.chr:
+            self.location = 'Chr %s @ Unknown position' % (self.chr)
+        else:
+            self.location = 'Not available'
+
+        fmt = self.location
+        ##XZ: deal with direction
+        if self.strand_probe == '+':
+            fmt += (' on the plus strand ')
+        elif self.strand_probe == '-':
+            fmt += (' on the minus strand ')
+
+        return fmt
+
+
+    def get_database(self):
+        """
+        Returns the database, and the url referring to the database if it exists
+
+        We're going to to return two values here, and we don't want to have to call this twice from
+        the template. So it's not a property called from the template, but instead is called from the view
+
+        """
+        if self.cellid:
+            self.cursor.execute("""
+                            select ProbeFreeze.Name from ProbeFreeze, ProbeSetFreeze
+                                    where
+                            ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
+                            ProbeSetFreeze.Id = %d""" % thisTrait.dataset.id)
+            probeDBName = self.cursor.fetchone()[0]
+            return dict(name = probeDBName,
+                        url = None)
+        else:
+            return dict(name = self.dataset.fullname,
+                        url = webqtlConfig.INFOPAGEHREF % self.dataset.name)
+
+    def calculate_correlation(self, values, method):
+        """Calculate the correlation value and p value according to the method specified"""
+
+        #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for
+        #There's probably a better way of dealing with this, but I'll have to ask Christian
+        updated_raw_values = []
+        updated_values = []
+        for i in range(len(values)):
+            if values[i] != "None":
+                updated_raw_values.append(self.raw_values[i])
+                updated_values.append(values[i])
+
+        self.raw_values = updated_raw_values
+        values = updated_values
+
+        if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON:
+            corr, nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values))
+        else:
+            corr, nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values))
+
+        self.correlation = corr
+        self.overlap = nOverlap
+
+        if self.overlap < 3:
+            self.p_value = 1.0
+        else:
+            #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented.
+            if abs(self.correlation) >= 1.0:
+                self.p_value = 0.0
+            else:
+                ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
+                ZValue = ZValue*sqrt(self.overlap-3)
+                self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
diff --git a/wqflask/base/webqtlTrait.py b/wqflask/base/webqtlTrait.py
deleted file mode 100755
index 5367b41f..00000000
--- a/wqflask/base/webqtlTrait.py
+++ /dev/null
@@ -1,695 +0,0 @@
-from __future__ import division, print_function
-
-import string
-
-from htmlgen import HTMLgen2 as HT
-
-import webqtlConfig
-from webqtlCaseData import webqtlCaseData
-from data_set import create_dataset
-from dbFunction import webqtlDatabaseFunction
-from utility import webqtlUtil
-
-from MySQLdb import escape_string as escape
-from pprint import pformat as pf
-
-from flask import Flask, g
-
-class GeneralTrait:
-    """
-    Trait class defines a trait in webqtl, can be either Microarray,
-    Published phenotype, genotype, or user input trait
-
-    """
-
-    def __init__(self, **kw):
-        print("in GeneralTrait")
-        self.dataset = kw.get('dataset', None)                  # database object
-        self.name = kw.get('name', None)                 # Trait ID, ProbeSet ID, Published ID, etc.
-        self.cellid = kw.get('cellid', None)
-        self.identification = kw.get('identification', 'un-named trait')
-        self.group = kw.get('group', None)
-        self.haveinfo = kw.get('haveinfo', False)
-        self.sequence = kw.get('sequence', None)              # Blat sequence, available for ProbeSet
-        self.data = kw.get('data', {})
-        
-        if kw.get('fullname'):
-            name2 = value.split("::")
-            if len(name2) == 2:
-                self.dataset, self.name = name2
-            elif len(name2) == 3:
-                self.dataset, self.name, self.cellid = name2
-                
-        #if self.dataset and isinstance(self.dataset, basestring):
-        self.dataset = create_dataset(self.dataset.name)
-
-        print("self.dataset is:", self.dataset, type(self.dataset))
-        #if self.dataset:
-        
-        self.dataset.get_group()
-        
-        if self.dataset.type == "Temp":
-            self.cursor.execute('''
-                    SELECT
-                            InbredSet.Name
-                    FROM
-                            InbredSet, Temp
-                    WHERE
-                            Temp.InbredSetId = InbredSet.Id AND
-                            Temp.Name = "%s"
-            ''', self.name)
-            self.group = self.cursor.fetchone()[0]
-        else:
-            self.group = self.dataset.get_group()
-
-        print("trinity, self.group is:", self.group)
-
-        #
-        # In ProbeSet, there are maybe several annotations match one sequence
-        # so we need use sequence(BlatSeq) as the identification, when we update
-        # one annotation, we update the others who match the sequence also.
-        #
-        # Hongqiang Li, 3/3/2008
-        #
-
-        #XZ, 05/08/2009: This block is not neccessary. We can add 'BlatSeq' into disfield.
-        # The variable self.sequence should be changed to self.BlatSeq
-        # It also should be changed in other places where it are used.
-
-        #if self.dataset:
-        if self.dataset.type == 'ProbeSet':
-            print("Doing ProbeSet Query")
-            query = '''
-                    SELECT
-                            ProbeSet.BlatSeq
-                    FROM
-                            ProbeSet, ProbeSetFreeze, ProbeSetXRef
-                    WHERE
-                            ProbeSet.Id=ProbeSetXRef.ProbeSetId and
-                            ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
-                            ProbeSet.Name = %s and
-                            ProbeSetFreeze.Name = %s
-            ''', (self.name, self.dataset.name)
-            print("query is:", query)
-            self.sequence = g.db.execute(*query).fetchone()[0]
-            #self.sequence = self.cursor.fetchone()[0]
-            print("self.sequence is:", self.sequence)
-
-
-    def getName(self):
-        str = ""
-        if self.dataset and self.name:
-            str = "%s::%s" % (self.dataset, self.name)
-            if self.cellid:
-                str += "::" + self.cellid
-        else:
-            str = self.description
-        return str
-
-    #
-    # when user enter a trait or GN generate a trait, user want show the name
-    # not the name that generated by GN randomly, the two follow function are
-    # used to give the real name and the database. displayName() will show the
-    # database also, getGivenName() just show the name.
-    # For other trait, displayName() as same as getName(), getGivenName() as
-    # same as self.name
-    #
-    # Hongqiang 11/29/07
-    #
-    def getGivenName(self):
-        str = self.name
-        if self.dataset and self.name:
-            if self.dataset.type=='Temp':
-                self.cursor.execute('SELECT description FROM Temp WHERE Name=%s', self.name)
-                desc = self.cursor.fetchone()[0]
-                if desc.__contains__('PCA'):
-                    desc = desc[desc.rindex(':')+1:].strip()
-                else:
-                    desc = desc[:desc.index('entered')].strip()
-                str = desc
-        return str
-
-    def displayName(self):
-        str = ""
-        if self.dataset and self.name:
-            if self.dataset.type=='Temp':
-                desc = self.description
-                if desc.__contains__('PCA'):
-                    desc = desc[desc.rindex(':')+1:].strip()
-                else:
-                    desc = desc[:desc.index('entered')].strip()
-                str = "%s::%s" % (self.dataset, desc)
-            else:
-                str = "%s::%s" % (self.dataset, self.name)
-                if self.cellid:
-                    str += "::" + self.cellid
-        else:
-            str = self.description
-
-        return str
-
-
-    #def __str__(self):
-    #       #return "%s %s" % (self.getName(), self.group)
-    #       return self.getName()
-    #__str__ = getName
-    #__repr__ = __str__
-
-    def exportData(self, samplelist, type="val"):
-        """
-                export data according to samplelist
-                mostly used in calculating correlation
-        """
-        result = []
-        for sample in samplelist:
-            if self.data.has_key(sample):
-                if type=='val':
-                    result.append(self.data[sample].val)
-                elif type=='var':
-                    result.append(self.data[sample].var)
-                elif type=='N':
-                    result.append(self.data[sample].N)
-                else:
-                    raise KeyError, `type`+' type is incorrect.'
-            else:
-                result.append(None)
-        return result
-
-    def exportInformative(self, incVar=0):
-        """
-                export informative sample
-                mostly used in qtl regression
-        """
-        samples = []
-        vals = []
-        vars = []
-        for sample, value in self.data.items():
-            if value.val != None:
-                if not incVar or value.var != None:
-                    samples.append(sample)
-                    vals.append(value.val)
-                    vars.append(value.var)
-        return  samples, vals, vars
-
-
-    #
-    # In ProbeSet, there are maybe several annotations match one sequence
-    # so we need use sequence(BlatSeq) as the identification, when we update
-    # one annotation, we update the others who match the sequence also.
-    #
-    # Hongqiang Li, 3/3/2008
-    #
-    def getSequence(self):
-        assert self.cursor
-        if self.dataset.type == 'ProbeSet':
-            self.cursor.execute('''
-                            SELECT
-                                    ProbeSet.BlatSeq
-                            FROM
-                                    ProbeSet, ProbeSetFreeze, ProbeSetXRef
-                            WHERE
-                                    ProbeSet.Id=ProbeSetXRef.ProbeSetId and
-                                    ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
-                                    ProbeSet.Name = %s
-                                    ProbeSetFreeze.Name = %s
-                    ''', self.name, self.dataset.name)
-            #self.cursor.execute(query)
-            results = self.fetchone()
-
-            return results[0]
-
-
-
-    def retrieveData(self, samplelist=None):
-
-        if samplelist == None:
-            samplelist = []
-        assert self.dataset and self.cursor
-
-        if self.dataset.type == 'Temp':
-            query = '''
-                    SELECT
-                            Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
-                    FROM
-                            TempData, Temp, Strain
-                    WHERE
-                            TempData.StrainId = Strain.Id AND
-                            TempData.Id = Temp.DataId AND
-                            Temp.name = '%s'
-                    Order BY
-                            Strain.Name
-                    ''' % self.name
-        #XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE
-        elif self.dataset.type == 'Publish':
-            query = '''
-                    SELECT
-                            Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
-                    FROM
-                            (PublishData, Strain, PublishXRef, PublishFreeze)
-                    left join PublishSE on
-                            (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
-                    left join NStrain on
-                            (NStrain.DataId = PublishData.Id AND
-                            NStrain.StrainId = PublishData.StrainId)
-                    WHERE
-                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
-                            PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
-                            PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
-                    Order BY
-                            Strain.Name
-                    ''' % (self.name, self.dataset.id)
-
-        #XZ, 03/02/2009: Xiaodong changed Data to ProbeData, SE to ProbeSE
-        elif self.cellid:
-            #Probe Data
-            query = '''
-                    SELECT
-                            Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id
-                    FROM
-                            (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef,
-                            Strain, Probe, ProbeSet)
-                    left join ProbeSE on
-                            (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
-                    WHERE
-                            Probe.Name = '%s' AND ProbeSet.Name = '%s' AND
-                            Probe.ProbeSetId = ProbeSet.Id AND
-                            ProbeXRef.ProbeId = Probe.Id AND
-                            ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND
-                            ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
-                            ProbeSetFreeze.Name = '%s' AND
-                            ProbeXRef.DataId = ProbeData.Id AND
-                            ProbeData.StrainId = Strain.Id
-                    Order BY
-                            Strain.Name
-                    ''' % (self.cellid, self.name, self.dataset.name)
-        #XZ, 03/02/2009: Xiaodong added this block for ProbeSetData and ProbeSetSE
-        elif self.dataset.type == 'ProbeSet':
-            #ProbeSet Data
-            query = '''
-                    SELECT
-                            Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
-                    FROM
-                            (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
-                    left join ProbeSetSE on
-                            (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
-                    WHERE
-                            ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
-                            ProbeSetFreeze.Name = '%s' AND
-                            ProbeSetXRef.DataId = ProbeSetData.Id AND
-                            ProbeSetData.StrainId = Strain.Id
-                    Order BY
-                            Strain.Name
-                    ''' % (self.name, self.dataset.name)
-        #XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE
-        else:
-            #Geno Data
-            #XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search.
-            query = '''
-                    SELECT
-                            Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
-                    FROM
-                            (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
-                    left join GenoSE on
-                            (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
-                    WHERE
-                            Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
-                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
-                            GenoFreeze.Name = '%s' AND
-                            GenoXRef.DataId = GenoData.Id AND
-                            GenoData.StrainId = Strain.Id
-                    Order BY
-                            Strain.Name
-                    ''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group), self.name, self.dataset.name)
-
-
-        self.cursor.execute(query)
-        results = self.cursor.fetchall()
-        self.data.clear()
-
-        if results:
-            self.mysqlid = results[0][-1]
-            #if samplelist:
-            for item in results:
-                #name, value, variance, num_cases = item
-                if not samplelist or (samplelist and name in samplelist):
-                    #if value != None:
-                    #    num_cases = None
-                    #    if self.dataset.type in ('Publish', 'Temp'):
-                    #        ndata = item[3]
-                    name = item[0]
-                    self.data[name] = webqtlCaseData(*item)   #name, value, variance, num_cases)
-                #end for
-        #    else:
-        #        for item in results:
-        #            val = item[1]
-        #            if val != None:
-        #                var = item[2]
-        #                ndata = None
-        #                if self.dataset.type in ('Publish', 'Temp'):
-        #                    ndata = item[3]
-        #                self.data[item[0]] = webqtlCaseData(val, var, ndata)
-        #        #end for
-        #    #end if
-        #else:
-        #    pass
-
-    #def keys(self):
-    #    return self.__dict__.keys()
-    #
-    #def has_key(self, key):
-    #    return self.__dict__.has_key(key)
-    #
-    #def items(self):
-    #    return self.__dict__.items()
-
-    def retrieve_info(self, QTL=False):
-        assert self.dataset, "Dataset doesn't exist"
-        if self.dataset.type == 'Publish':
-            query = """
-                    SELECT
-                            PublishXRef.Id, Publication.PubMed_ID,
-                            Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
-                            Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
-                            Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
-                            Publication.Authors, Publication.Title, Publication.Abstract,
-                            Publication.Journal, Publication.Volume, Publication.Pages,
-                            Publication.Month, Publication.Year, PublishXRef.Sequence,
-                            Phenotype.Units, PublishXRef.comments
-                    FROM
-                            PublishXRef, Publication, Phenotype, PublishFreeze
-                    WHERE
-                            PublishXRef.Id = %s AND
-                            Phenotype.Id = PublishXRef.PhenotypeId AND
-                            Publication.Id = PublishXRef.PublicationId AND
-                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
-                            PublishFreeze.Id = %s
-                    """ % (self.name, self.dataset.id)
-            traitInfo = g.db.execute(query).fetchone()
-        #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
-        #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
-        elif self.dataset.type == 'ProbeSet':
-            display_fields_string = ', ProbeSet.'.join(self.dataset.display_fields)
-            display_fields_string = 'ProbeSet.' + display_fields_string
-            query = """
-                    SELECT %s
-                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
-                    WHERE
-                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
-                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-                            ProbeSetFreeze.Name = '%s' AND
-                            ProbeSet.Name = '%s'
-                    """ % (display_fields_string, self.dataset.name, self.name)
-            traitInfo = g.db.execute(query).fetchone()
-            print("traitInfo is: ", pf(traitInfo))
-        #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
-        # to avoid the problem of same marker name from different species.
-        elif self.dataset.type == 'Geno':
-            display_fields_string = string.join(self.dataset.display_fields,',Geno.')
-            display_fields_string = 'Geno.' + display_fields_string
-            query = """
-                    SELECT %s
-                    FROM Geno, GenoFreeze, GenoXRef
-                    WHERE
-                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
-                            GenoXRef.GenoId = Geno.Id AND
-                            GenoFreeze.Name = '%s' AND
-                            Geno.Name = '%s'
-                    """ % (display_fields_string, self.dataset.name, self.name)
-            traitInfo = g.db.execute(query).fetchone()
-            print("traitInfo is: ", pf(traitInfo))
-        else: #Temp type
-            query = """SELECT %s FROM %s WHERE Name = %s
-                                     """ % (string.join(self.dataset.display_fields,','),
-                                            self.dataset.type, self.name)
-            traitInfo = g.db.execute(query).fetchone()
-
-
-        #self.cursor.execute(query)
-        #traitInfo = self.cursor.fetchone()
-        if traitInfo:
-            self.haveinfo = True
-
-            #XZ: assign SQL query result to trait attributes.
-            for i, field in enumerate(self.dataset.display_fields):
-                setattr(self, field, traitInfo[i])
-
-            if self.dataset.type == 'Publish':
-                self.confidential = 0
-                if self.pre_publication_description and not self.pubmed_id:
-                    self.confidential = 1
-
-            self.homologeneid = None
-            if self.dataset.type == 'ProbeSet' and self.group and self.geneid:
-                #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
-                #XZ: So I have to test if geneid is number before execute the query.
-                #XZ: The geneid values in database should be cleaned up.
-                try:
-                    junk = float(self.geneid)
-                    geneidIsNumber = 1
-                except:
-                    geneidIsNumber = 0
-
-                if geneidIsNumber:
-                    result = g.db.execute("""
-                            SELECT
-                                    HomologeneId
-                            FROM
-                                    Homologene, Species, InbredSet
-                            WHERE
-                                    Homologene.GeneId =%s AND
-                                    InbredSet.Name = '%s' AND
-                                    InbredSet.SpeciesId = Species.Id AND
-                                    Species.TaxonomyId = Homologene.TaxonomyId
-                            """, (self.geneid, self.group)).fetchone()
-                    #self.cursor.execute(query)
-                    #result = self.cursor.fetchone()
-                else:
-                    result = None
-
-                if result:
-                    self.homologeneid = result[0]
-
-            if QTL:
-                if self.dataset.type == 'ProbeSet' and not self.cellid:
-                    traitQTL = g.db.execute("""
-                            SELECT
-                                    ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean
-                            FROM
-                                    ProbeSetXRef, ProbeSet
-                            WHERE
-                                    ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-                                    ProbeSet.Name = "%s" AND
-                                    ProbeSetXRef.ProbeSetFreezeId =%s
-                            """, (self.name, self.dataset.id)).fetchone()
-                    #self.cursor.execute(query)
-                    #traitQTL = self.cursor.fetchone()
-                    if traitQTL:
-                        self.locus, self.lrs, self.pvalue, self.mean = traitQTL
-                    else:
-                        self.locus = self.lrs = self.pvalue = self.mean = ""
-                if self.dataset.type == 'Publish':
-                    traitQTL = g.db.execute("""
-                            SELECT
-                                    PublishXRef.Locus, PublishXRef.LRS
-                            FROM
-                                    PublishXRef, PublishFreeze
-                            WHERE
-                                    PublishXRef.Id = %s AND
-                                    PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
-                                    PublishFreeze.Id =%s
-                            """, (self.name, self.dataset.id)).fetchone()
-                    #self.cursor.execute(query)
-                    #traitQTL = self.cursor.fetchone()
-                    if traitQTL:
-                        self.locus, self.lrs = traitQTL
-                    else:
-                        self.locus = self.lrs = ""
-        else:
-            raise KeyError, `self.name`+' information is not found in the database.'
-
-    def genHTML(self, formName = "", dispFromDatabase=0, privilege="guest", userName="Guest", authorized_users=""):
-        if not self.haveinfo:
-            self.retrieveInfo()
-
-        if self.dataset.type == 'Publish':
-            PubMedLink = ""
-            if self.pubmed_id:
-                PubMedLink = HT.Href(text="PubMed %d : " % self.pubmed_id,
-                target = "_blank", url = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id)
-            else:
-                PubMedLink = HT.Span("Unpublished : ", Class="fs15")
-
-            if formName:
-                setDescription2 = HT.Href(url="javascript:showDatabase3('%s','%s','%s','')" %
-                (formName, self.dataset.name, self.name), Class = "fs14")
-            else:
-                setDescription2 = HT.Href(url="javascript:showDatabase2('%s','%s','')" %
-                (self.dataset.name,self.name), Class = "fs14")
-
-            if self.confidential and not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=privilege, userName=userName, authorized_users=authorized_users):
-                setDescription2.append('RecordID/%s - %s' % (self.name, self.pre_publication_description))
-            else:
-                setDescription2.append('RecordID/%s - %s' % (self.name, self.post_publication_description))
-
-            #XZ 03/26/2011: Xiaodong comment out the following two lins as Rob asked. Need to check with Rob why in PublishXRef table, there are few row whose Sequence > 1.
-            #if self.sequence > 1:
-            #       setDescription2.append(' btach %d' % self.sequence)
-            if self.authors:
-                a1 = string.split(self.authors,',')[0]
-                while a1[0] == '"' or a1[0] == "'" :
-                    a1 = a1[1:]
-                setDescription2.append(' by ')
-                setDescription2.append(HT.Italic('%s, and colleagues' % a1))
-            setDescription = HT.Span(PubMedLink, setDescription2)
-
-        elif self.dataset.type == 'Temp':
-            setDescription = HT.Href(text="%s" % (self.description),url="javascript:showDatabase2\
-            ('%s','%s','')" % (self.dataset.name,self.name), Class = "fs14")
-            setDescription = HT.Span(setDescription)
-
-        elif self.dataset.type == 'Geno': # Genome DB only available for single search
-            if formName:
-                setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
-        '%2.3f' % self.mb),url="javascript:showDatabase3('%s','%s','%s','')" % \
-        (formName, self.dataset.name, self.name), Class = "fs14")
-            else:
-                setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
-        '%2.3f' % self.mb),url="javascript:showDatabase2('%s','%s','')" % \
-        (self.dataset.name,self.name), Class = "fs14")
-
-            setDescription = HT.Span(setDescription)
-
-        else:
-            if self.cellid:
-                if formName:
-                    setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name, self.cellid),url=\
-            "javascript:showDatabase3('%s','%s','%s','%s')" % (formName, self.dataset.name,self.name,self.cellid), \
-            Class = "fs14")
-                else:
-                    setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name,self.cellid),url=\
-            "javascript:showDatabase2('%s','%s','%s')" % (self.dataset.name,self.name,self.cellid), \
-            Class = "fs14")
-            else:
-                if formName:
-                    setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
-            "javascript:showDatabase3('%s','%s','%s','')" % (formName, self.dataset.name,self.name), \
-            Class = "fs14")
-                else:
-                    setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
-            "javascript:showDatabase2('%s','%s','')" % (self.dataset.name,self.name), \
-            Class = "fs14")
-            if self.symbol and self.chr and self.mb:
-                setDescription.append(' [')
-                setDescription.append(HT.Italic('%s' % self.symbol,Class="cdg fwb"))
-                setDescription.append(' on Chr %s @ %s Mb]' % (self.chr,self.mb))
-            if self.description:
-                setDescription.append(': %s' % self.description)
-            if self.probe_target_description:
-                setDescription.append('; %s' % self.probe_target_description)
-            setDescription = HT.Span(setDescription)
-
-        if self.dataset.type != 'Temp' and dispFromDatabase:
-            setDescription.append( ' --- FROM : ')
-            setDescription.append(self.dataset.genHTML(Class='cori'))
-        return setDescription
-
-    @property
-    def description_fmt(self):
-        '''Return a text formated description'''
-        if self.description:
-            formatted = self.description
-            if self.probe_target_description:
-                formatted += "; " + self.probe_target_description
-        else:
-            formatted = "Not available"
-        return formatted.capitalize()
-
-    @property
-    def alias_fmt(self):
-        '''Return a text formatted alias'''
-        if self.alias:
-            alias = string.replace(self.alias, ";", " ")
-            alias = string.join(string.split(alias), ", ")
-        return alias
-
-
-    @property
-    def location_fmt(self):
-        '''Return a text formatted location
-
-        While we're at it we set self.location in case we need it later (do we?)
-
-        '''
-
-        if self.chr and self.mb:
-            self.location = 'Chr %s @ %s Mb'  % (self.chr,self.mb)
-        elif self.chr:
-            self.location = 'Chr %s @ Unknown position' % (self.chr)
-        else:
-            self.location = 'Not available'
-
-        fmt = self.location
-        ##XZ: deal with direction
-        if self.strand_probe == '+':
-            fmt += (' on the plus strand ')
-        elif self.strand_probe == '-':
-            fmt += (' on the minus strand ')
-
-        return fmt
-
-
-    def get_database(self):
-        """
-        Returns the database, and the url referring to the database if it exists
-
-        We're going to to return two values here, and we don't want to have to call this twice from
-        the template. So it's not a property called from the template, but instead is called from the view
-
-        """
-        if self.cellid:
-            self.cursor.execute("""
-                            select ProbeFreeze.Name from ProbeFreeze, ProbeSetFreeze
-                                    where
-                            ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
-                            ProbeSetFreeze.Id = %d""" % thisTrait.dataset.id)
-            probeDBName = self.cursor.fetchone()[0]
-            return dict(name = probeDBName,
-                        url = None)
-        else:
-            return dict(name = self.dataset.fullname,
-                        url = webqtlConfig.INFOPAGEHREF % self.dataset.name)
-
-    def calculate_correlation(self, values, method):
-        """Calculate the correlation value and p value according to the method specified"""
-
-        #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for
-        #There's probably a better way of dealing with this, but I'll have to ask Christian
-        updated_raw_values = []
-        updated_values = []
-        for i in range(len(values)):
-            if values[i] != "None":
-                updated_raw_values.append(self.raw_values[i])
-                updated_values.append(values[i])
-
-        self.raw_values = updated_raw_values
-        values = updated_values
-
-        if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON:
-            corr, nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values))
-        else:
-            corr, nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values))
-
-        self.correlation = corr
-        self.overlap = nOverlap
-
-        if self.overlap < 3:
-            self.p_value = 1.0
-        else:
-            #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented.
-            if abs(self.correlation) >= 1.0:
-                self.p_value = 0.0
-            else:
-                ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
-                ZValue = ZValue*sqrt(self.overlap-3)
-                self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index 4301fb50..69602748 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -26,10 +26,11 @@ class DoSearch(object):
         assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator"
         self.search_operator = search_operator
         self.dataset = dataset
+        print("self.dataset is boo: ", type(self.dataset), pf(self.dataset))
+        print("self.dataset.group is: ", pf(self.dataset.group))
 
         #Get group information for dataset and the species id
-        self.dataset.get_group()
-        self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group)           
+        self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name)           
 
     def execute(self, query):
         """Executes query and returns results"""
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index cd478110..7c50dfeb 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -30,7 +30,7 @@ from base import webqtlConfig
 from utility.THCell import THCell
 from utility.TDCell import TDCell
 from base.data_set import create_dataset
-from base.webqtlTrait import GeneralTrait
+from base.trait import GeneralTrait
 from base.templatePage import templatePage
 from wqflask import parser
 from wqflask import do_search
@@ -99,8 +99,7 @@ class SearchResultPage(templatePage):
         """
         self.trait_list = []
         
-        group = self.dataset.group
-        species = webqtlDatabaseFunction.retrieve_species(group=group)        
+        species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)        
         
         # result_set represents the results for each search term; a search of 
         # "shh grin2b" would have two sets of results, one for each term
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index aef9219f..2bc4fc9c 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -13,7 +13,8 @@ from base import webqtlConfig
 from base import webqtlCaseData
 from wqflask.show_trait.SampleList import SampleList
 from utility import webqtlUtil, Plot, Bunch
-from base.webqtlTrait import GeneralTrait
+from base.trait import GeneralTrait
+from base.data_set import create_dataset
 from dbFunction import webqtlDatabaseFunction
 from base.templatePage import templatePage
 from basicStatistics import BasicStatisticsFunctions
@@ -33,105 +34,111 @@ class ShowTrait(templatePage):
 
     def __init__(self, args):
         print("in ShowTrait, args are:", args)
-        self.group = args.group
-        self.trait_id = trait_id
-        self.dataset = dataset
+        #self.group = args.group
+        self.trait_id = args['trait_id']
+        self.dataset = create_dataset(args['dataset'])
+        self.cell_id = None
 
         #assert self.openMysql(), "No database!"
 
         #print("red3 fd.group:", fd.group)
         this_trait = self.get_this_trait()
 
-        print("red4 fd.group:", fd.group)
+        #print("red4 fd.group:", fd.group)
         ##read genotype file
-        fd.group = this_trait.group
+        #fd.group = this_trait.group
 
-        print("[red5] fd.group is:", fd.group)
-        fd.readGenotype()
+        #print("[red5] fd.group is:", fd.group)
+        self.dataset.group.read_genotype_file()
+        #fd.readGenotype()
 
-        if not fd.genotype:
-            fd.readData(incf1=1)
+        if not self.dataset.group.genotype:
+            self.read_data(incf1=1)
 
-        # determine data editing page format
-        variance_data_page = 0
-        if fd.formID == 'varianceChoice':
-            variance_data_page = 1
-
-        if variance_data_page:
-            fmID='dataEditing'
-        else:
-            if fd.enablevariance:
-                fmID='pre_dataEditing'
-            else:
-                fmID='dataEditing'
-
-        # Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery
-        hddn = OrderedDict(
-                FormID = fmID,
-                group = fd.group,
-                submitID = '',
-                scale = 'physic',
-                additiveCheck = 'ON',
-                showSNP = 'ON',
-                showGenes = 'ON',
-                method = None,
-                parentsf14regression = 'OFF',
-                stats_method = '1',
-                chromosomes = '-1',
-                topten = '',
-                viewLegend = 'ON',
-                intervalAnalystCheck = 'ON',
-                valsHidden = 'OFF',
-                database = '',
-                criteria = None,
-                MDPChoice = None,
-                bootCheck = None,
-                permCheck = None,
-                applyVarianceSE = None,
-                sampleNames = '_',
-                sampleVals = '_',
-                sampleVars = '_',
-                otherStrainNames = '_',
-                otherStrainVals = '_',
-                otherStrainVars = '_',
-                extra_attributes = '_',
-                other_extra_attributes = '_',
-                export_data = None
-                )
-
-        if fd.enablevariance:
-            hddn['enablevariance']='ON'
-        if fd.incparentsf1:
-            hddn['incparentsf1']='ON'
-
-        if this_trait:
-            hddn['fullname'] = str(this_trait)
-            try:
-                hddn['normalPlotTitle'] = this_trait.symbol
-                hddn['normalPlotTitle'] += ": "
-                hddn['normalPlotTitle'] += this_trait.name
-            except:
-                hddn['normalPlotTitle'] = str(this_trait.name)
-            hddn['fromDataEditingPage'] = 1
-            if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
-                hddn['trait_type'] = this_trait.dataset.type
-                if this_trait.cellid:
-                    hddn['cellid'] = this_trait.cellid
-                else:
-                    self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
-                                        this_trait.mysqlid)
-                    heritability = self.cursor.fetchone()
-                    hddn['heritability'] = heritability
-
-                hddn['attribute_names'] = ""
-
-        hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor,
-                                                                           groupName=fd.group)
-
-        if fd.identification:
-            hddn['identification'] = fd.identification
-        else:
-            hddn['identification'] = "Un-named trait"  #If no identification, set identification to un-named
+        ## determine data editing page format
+        #variance_data_page = 0
+        #if fd.formID == 'varianceChoice':
+        #    variance_data_page = 1
+        #
+        #if variance_data_page:
+        #    fmID='dataEditing'
+        #else:
+        #    if fd.enablevariance:
+        #        fmID='pre_dataEditing'
+        #    else:
+        #        fmID='dataEditing'
+        
+        # Todo: Add back in the ones we actually need from below, as we discover we need them
+        hddn = OrderedDict()
+
+    
+        ## Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery
+        #hddn = OrderedDict(
+        #        FormID = fmID,
+        #        group = fd.group,
+        #        submitID = '',
+        #        scale = 'physic',
+        #        additiveCheck = 'ON',
+        #        showSNP = 'ON',
+        #        showGenes = 'ON',
+        #        method = None,
+        #        parentsf14regression = 'OFF',
+        #        stats_method = '1',
+        #        chromosomes = '-1',
+        #        topten = '',
+        #        viewLegend = 'ON',
+        #        intervalAnalystCheck = 'ON',
+        #        valsHidden = 'OFF',
+        #        database = '',
+        #        criteria = None,
+        #        MDPChoice = None,
+        #        bootCheck = None,
+        #        permCheck = None,
+        #        applyVarianceSE = None,
+        #        sampleNames = '_',
+        #        sampleVals = '_',
+        #        sampleVars = '_',
+        #        otherStrainNames = '_',
+        #        otherStrainVals = '_',
+        #        otherStrainVars = '_',
+        #        extra_attributes = '_',
+        #        other_extra_attributes = '_',
+        #        export_data = None
+        #        )
+
+        #if fd.enablevariance:
+        #    hddn['enablevariance']='ON'
+        #if fd.incparentsf1:
+        #    hddn['incparentsf1']='ON'
+
+        #if this_trait:
+        #    hddn['fullname'] = str(this_trait)
+        #    try:
+        #        hddn['normalPlotTitle'] = this_trait.symbol
+        #        hddn['normalPlotTitle'] += ": "
+        #        hddn['normalPlotTitle'] += this_trait.name
+        #    except:
+        #        hddn['normalPlotTitle'] = str(this_trait.name)
+        #    hddn['fromDataEditingPage'] = 1
+        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
+        #        hddn['trait_type'] = this_trait.dataset.type
+        #        if this_trait.cellid:
+        #            hddn['cellid'] = this_trait.cellid
+        #        else:
+        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
+        #                                this_trait.mysqlid)
+        #            heritability = self.cursor.fetchone()
+        #            hddn['heritability'] = heritability
+        #
+        #        hddn['attribute_names'] = ""
+        #
+        #hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor,
+        #                                                                   groupName=fd.group)
+        #
+        #if fd.identification:
+        #    hddn['identification'] = fd.identification
+        #else:
+        #    hddn['identification'] = "Un-named trait"  #If no identification, set identification to un-named
 
         self.dispTraitInformation(fd, "", hddn, this_trait) #Display trait information + function buttons
 
@@ -186,27 +193,109 @@ class ShowTrait(templatePage):
         #trait_id = self.fd['trait_id']
         #cell_id = self.fd.get('CellID')
 
-        this_trait = webqtlTrait(dataset=dataset,
-                                 name=trait_id,
-                                 cellid=cell_id)
+        this_trait = GeneralTrait(dataset=self.dataset.name,
+                                 name=self.trait_id,
+                                 cellid=self.cell_id)
 
         ##identification, etc.
-        self.fd.identification = '%s : %s' % (this_trait.dataset.shortname, trait_id)
+        self.identification = '%s : %s' % (self.dataset.shortname, self.trait_id)
         this_trait.returnURL = webqtlConfig.CGIDIR + webqtlConfig.SCRIPTFILE + '?FormID=showDatabase&database=%s\
-                &ProbeSetID=%s&group=%s&parentsf1=on' %(dataset, trait_id, self.fd['group'])
+                &ProbeSetID=%s&group=%s&parentsf1=on' %(self.dataset, self.trait_id, self.dataset.group.name)
 
-        if cell_id:
-            self.fd.identification = '%s/%s'%(self.fd.identification, cell_id)
-            this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, cell_id)
+        if self.cell_id:
+            self.identification = '%s/%s'%(self.identification, self.cell_id)
+            this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, self.cell_id)
 
-        print("yellow1:", self.group)
-        this_trait.retrieveInfo()
-        print("yellow2:", self.group)
-        this_trait.retrieveData()
-        print("yellow3:", self.group)
+        print("yellow1:", self.dataset.group)
+        this_trait.retrieve_info()
+        print("yellow2:", self.dataset.group)
+        this_trait.retrieve_sample_data()
+        print("yellow3:", self.dataset.group)
         return this_trait
 
 
+    def read_data(self):
+        '''read user input data or from trait data and analysis form'''
+
+        if incf1 == None:
+            incf1 = []
+
+        if not self.genotype:
+            self.readGenotype()
+        if not samplelist:
+            if incf1:
+                samplelist = self.f1list + self.samplelist
+            else:
+                samplelist = self.samplelist
+
+        #print("before traitfiledata self.traitfile is:", pf(self.traitfile))
+
+        traitfiledata = getattr(self, "traitfile", None)
+        traitpastedata = getattr(self, "traitpaste", None)
+        variancefiledata = getattr(self, "variancefile", None)
+        variancepastedata = getattr(self, "variancepaste", None)
+        Nfiledata = getattr(self, "Nfile", None)
+
+        #### Todo: Rewrite below when we get to someone submitting their own trait #####
+
+        def to_float(item):
+            try:
+                return float(item)
+            except ValueError:
+                return None
+
+        print("bottle samplelist is:", samplelist)
+        if traitfiledata:
+            tt = traitfiledata.split()
+            values = map(webqtlUtil.StringAsFloat, tt)
+        elif traitpastedata:
+            tt = traitpastedata.split()
+            values = map(webqtlUtil.StringAsFloat, tt)
+        else:
+            print("mapping formdataasfloat")
+            #values = map(self.FormDataAsFloat, samplelist)
+            values = [to_float(getattr(self, key)) for key in samplelist]
+        print("rocket values is:", values)
+
+
+        if len(values) < len(samplelist):
+            values += [None] * (len(samplelist) - len(values))
+        elif len(values) > len(samplelist):
+            values = values[:len(samplelist)]
+        print("now values is:", values)
+
+
+        if variancefiledata:
+            tt = variancefiledata.split()
+            variances = map(webqtlUtil.StringAsFloat, tt)
+        elif variancepastedata:
+            tt = variancepastedata.split()
+            variances = map(webqtlUtil.StringAsFloat, tt)
+        else:
+            variances = map(self.FormVarianceAsFloat, samplelist)
+
+        if len(variances) < len(samplelist):
+            variances += [None]*(len(samplelist) - len(variances))
+        elif len(variances) > len(samplelist):
+            variances = variances[:len(samplelist)]
+
+        if Nfiledata:
+            tt = string.split(Nfiledata)
+            nsamples = map(webqtlUtil.IntAsFloat, tt)
+            if len(nsamples) < len(samplelist):
+                nsamples += [None]*(len(samplelist) - len(nsamples))
+        else:
+            nsamples = map(self.FormNAsFloat, samplelist)
+
+        ##values, variances, nsamples is obsolete
+        self.allTraitData = {}
+        for i, _sample in enumerate(samplelist):
+            if values[i] != None:
+                self.allTraitData[_sample] = webqtlCaseData(
+                    _sample, values[i], variances[i], nsamples[i])
+        print("allTraitData is:", pf(self.allTraitData))
+        
+
     def dispTraitInformation(self, fd, title1Body, hddn, this_trait):
 
         _Species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, group=fd.group)
-- 
cgit 1.4.1