3 files changed, 480 insertions, 511 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ae7fdcb5..41c5d8ba 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -44,7 +44,7 @@ from db import webqtlDatabaseFunction
 from utility import webqtlUtil
 from utility.benchmark import Bench
 from utility import chunks
-from utility.tools import locate, locate_ignore_error
+from utility.tools import locate, locate_ignore_error, flat_files
 
 from maintenance import get_group_samplelists
 
@@ -53,7 +53,7 @@ from pprint import pformat as pf
 from db.gn_server import menu_main
 from db.call import fetchall,fetchone,fetch1
 
-from utility.tools import USE_GN_SERVER, USE_REDIS
+from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists
 from utility.logger import getLogger
 logger = getLogger(__name__ )
 
@@ -226,7 +226,7 @@ class Markers(object):
 class HumanMarkers(Markers):
 
     def __init__(self, name, specified_markers = []):
-        marker_data_fh = open(locate('genotype') + '/' + name + '.bim')
+        marker_data_fh = open(flat_files('mapping') + '/' + name + '.bim')
         self.markers = []
         for line in marker_data_fh:
             splat = line.strip().split()
@@ -271,7 +271,8 @@ class DatasetGroup(object):
         self.f1list = None
         self.parlist = None
         self.get_f1_parent_strains()
-        #logger.debug("parents/f1s: {}:{}".format(self.parlist, self.f1list))
+
+        self.accession_id = self.get_accession_id()
 
         self.species = webqtlDatabaseFunction.retrieve_species(self.name)
 
@@ -280,15 +281,39 @@ class DatasetGroup(object):
         self._datasets = None
         self.genofile = None
 
+    def get_accession_id(self):
+        results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
+                    InbredSet.Name = %s and
+                    PublishFreeze.InbredSetId = InbredSet.Id and
+                    InfoFiles.InfoPageName = PublishFreeze.Name and
+                    PublishFreeze.public > 0 and
+                    PublishFreeze.confidentiality < 1 order by
+                    PublishFreeze.CreateTime desc""", (self.name)).fetchone()
+
+        if results != None:
+            return str(results[0])
+        else:
+            return "None"
+
     def get_specified_markers(self, markers = []):
         self.markers = HumanMarkers(self.name, markers)
 
     def get_markers(self):
-        #logger.debug("self.species is:", self.species)
-        if self.species == "human":
+        logger.debug("self.species is:", self.species)
+
+        def check_plink_gemma():
+            if flat_file_exists("mapping"):
+                MAPPING_PATH = flat_files("mapping")+"/"
+                if (os.path.isfile(MAPPING_PATH+self.name+".bed") and
+                    (os.path.isfile(MAPPING_PATH+self.name+".map") or
+                     os.path.isfile(MAPPING_PATH+self.name+".bim"))):
+                    return True
+            return False
+
+        if check_plink_gemma():
             marker_class = HumanMarkers
         else:
-            marker_class = Markers
+            marker_class = Markers            
 
         self.markers = marker_class(self.name)
 
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 276c624a..bf87e879 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -23,6 +23,8 @@ from flask import Flask, g, request
 from utility.logger import getLogger
 logger = getLogger(__name__ )
 
+from wqflask import user_manager
+
 def print_mem(stage=""):
     mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
     print("{}: {}".format(stage, mem/1024))
@@ -55,6 +57,7 @@ class GeneralTrait(object):
         self.lrs = None
         self.pvalue = None
         self.mean = None
+        self.additive = None
         self.num_overlap = None
         self.strand_probe = None
         self.symbol = None
@@ -69,45 +72,9 @@ class GeneralTrait(object):
 
         # Todo: These two lines are necessary most of the time, but perhaps not all of the time
         # So we could add a simple if statement to short-circuit this if necessary
-        self.retrieve_info(get_qtl_info=get_qtl_info)
+        self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info)
         if get_sample_info != False:
-            self.retrieve_sample_data()
-
-
-    def jsonable(self):
-        """Return a dict suitable for using as json
-
-        Actual turning into json doesn't happen here though"""
-
-        if self.dataset.type == "ProbeSet":
-            return dict(name=self.name,
-                        symbol=self.symbol,
-                        dataset=self.dataset.name,
-                        description=self.description_display,
-                        mean=self.mean,
-                        location=self.location_repr,
-                        lrs_score=self.LRS_score_repr,
-                        lrs_location=self.LRS_location_repr,
-                        additive=self.additive
-                        )
-        elif self.dataset.type == "Publish":
-            return dict(name=self.name,
-                        dataset=self.dataset.name,
-                        description=self.description_display,
-                        authors=self.authors,
-                        pubmed_text=self.pubmed_text,
-                        pubmed_link=self.pubmed_link,
-                        lrs_score=self.LRS_score_repr,
-                        lrs_location=self.LRS_location_repr,
-                        additive=self.additive
-                        )
-        elif self.dataset.type == "Geno":
-            return dict(name=self.name,
-                        dataset=self.dataset.name,
-                        location=self.location_repr
-                        )
-        else:
-            return dict()
+            self = retrieve_sample_data(self, self.dataset)
 
 
     def get_name(self):
@@ -213,410 +180,6 @@ class GeneralTrait(object):
         return  samples, vals, the_vars, sample_aliases
 
 
-    #
-    # In ProbeSet, there are maybe several annotations match one sequence
-    # so we need use sequence(BlatSeq) as the identification, when we update
-    # one annotation, we update the others who match the sequence also.
-    #
-    # Hongqiang Li, 3/3/2008
-    #
-    #def getSequence(self):
-    #    assert self.cursor
-    #    if self.dataset.type == 'ProbeSet':
-    #        self.cursor.execute('''
-    #                        SELECT
-    #                                ProbeSet.BlatSeq
-    #                        FROM
-    #                                ProbeSet, ProbeSetFreeze, ProbeSetXRef
-    #                        WHERE
-    #                                ProbeSet.Id=ProbeSetXRef.ProbeSetId and
-    #                                ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
-    #                                ProbeSet.Name = %s
-    #                                ProbeSetFreeze.Name = %s
-    #                ''', self.name, self.dataset.name)
-    #        #self.cursor.execute(query)
-    #        results = self.fetchone()
-    #
-    #        return results[0]
-
-
-
-    def retrieve_sample_data(self, samplelist=None):
-        if samplelist == None:
-            samplelist = []
-
-        results = self.dataset.retrieve_sample_data(self.name)
-
-        # Todo: is this necessary? If not remove
-        self.data.clear()
-
-        all_samples_ordered = self.dataset.group.all_samples_ordered()
-
-        if results:
-            for item in results:
-                name, value, variance, num_cases, name2 = item
-                if not samplelist or (samplelist and name in samplelist):
-                    self.data[name] = webqtlCaseData(*item)   #name, value, variance, num_cases)
-
-    def retrieve_info(self, get_qtl_info=False):
-        assert self.dataset, "Dataset doesn't exist"
-        if self.dataset.type == 'Publish':
-            query = """
-                    SELECT
-                            PublishXRef.Id, Publication.PubMed_ID,
-                            Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
-                            Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
-                            Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
-                            Publication.Authors, Publication.Title, Publication.Abstract,
-                            Publication.Journal, Publication.Volume, Publication.Pages,
-                            Publication.Month, Publication.Year, PublishXRef.Sequence,
-                            Phenotype.Units, PublishXRef.comments
-                    FROM
-                            PublishXRef, Publication, Phenotype, PublishFreeze
-                    WHERE
-                            PublishXRef.Id = %s AND
-                            Phenotype.Id = PublishXRef.PhenotypeId AND
-                            Publication.Id = PublishXRef.PublicationId AND
-                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
-                            PublishFreeze.Id = %s
-                    """ % (self.name, self.dataset.id)
-
-            logger.sql(query)
-            trait_info = g.db.execute(query).fetchone()
-
-
-        #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
-        #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
-        elif self.dataset.type == 'ProbeSet':
-            display_fields_string = ', ProbeSet.'.join(self.dataset.display_fields)
-            display_fields_string = 'ProbeSet.' + display_fields_string
-            query = """
-                    SELECT %s
-                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
-                    WHERE
-                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
-                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-                            ProbeSetFreeze.Name = '%s' AND
-                            ProbeSet.Name = '%s'
-                    """ % (escape(display_fields_string),
-                           escape(self.dataset.name),
-                           escape(str(self.name)))
-            logger.sql(query)
-            trait_info = g.db.execute(query).fetchone()
-        #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
-        # to avoid the problem of same marker name from different species.
-        elif self.dataset.type == 'Geno':
-            display_fields_string = string.join(self.dataset.display_fields,',Geno.')
-            display_fields_string = 'Geno.' + display_fields_string
-            query = """
-                    SELECT %s
-                    FROM Geno, GenoFreeze, GenoXRef
-                    WHERE
-                            GenoXRef.GenoFreezeId = GenoFreeze.Id AND
-                            GenoXRef.GenoId = Geno.Id AND
-                            GenoFreeze.Name = '%s' AND
-                            Geno.Name = '%s'
-                    """ % (escape(display_fields_string),
-                           escape(self.dataset.name),
-                           escape(self.name))
-            logger.sql(query)
-            trait_info = g.db.execute(query).fetchone()
-        else: #Temp type
-            query = """SELECT %s FROM %s WHERE Name = %s"""
-            logger.sql(query)
-            trait_info = g.db.execute(query,
-                                      (string.join(self.dataset.display_fields,','),
-                                                   self.dataset.type, self.name)).fetchone()
-        if trait_info:
-            self.haveinfo = True
-
-            #XZ: assign SQL query result to trait attributes.
-            for i, field in enumerate(self.dataset.display_fields):
-                holder = trait_info[i]
-                if isinstance(trait_info[i], basestring):
-                    holder = unicode(trait_info[i], "utf8", "ignore")
-                setattr(self, field, holder)
-
-            if self.dataset.type == 'Publish':
-                self.confidential = 0
-                if self.pre_publication_description and not self.pubmed_id:
-                    self.confidential = 1
-
-                description = self.post_publication_description
-
-                #If the dataset is confidential and the user has access to confidential
-                #phenotype traits, then display the pre-publication description instead
-                #of the post-publication description
-                if self.confidential:
-                    self.description_display = self.pre_publication_description
-
-                    #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
-                    #        privilege=self.dataset.privilege,
-                    #        userName=self.dataset.userName,
-                    #        authorized_users=self.authorized_users):
-                    #
-                    #    description = self.pre_publication_description
-                else:
-                    if description:
-                        self.description_display = description.strip()
-                    else:
-                        self.description_display = ""
-
-                if not self.year.isdigit():
-                    self.pubmed_text = "N/A"
-                else:
-                    self.pubmed_text = self.year
-
-                if self.pubmed_id:
-                    self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id
-
-
-            self.homologeneid = None
-            if self.dataset.type == 'ProbeSet' and self.dataset.group:
-                if self.geneid:
-                    #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
-                    #XZ: So I have to test if geneid is number before execute the query.
-                    #XZ: The geneid values in database should be cleaned up.
-                    #try:
-                    #    float(self.geneid)
-                    #    geneidIsNumber = True
-                    #except ValueError:
-                    #    geneidIsNumber = False
-                    #if geneidIsNumber:
-                    query = """
-                            SELECT
-                                    HomologeneId
-                            FROM
-                                    Homologene, Species, InbredSet
-                            WHERE
-                                    Homologene.GeneId ='%s' AND
-                                    InbredSet.Name = '%s' AND
-                                    InbredSet.SpeciesId = Species.Id AND
-                                    Species.TaxonomyId = Homologene.TaxonomyId
-                            """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
-                    logger.sql(query)
-                    result = g.db.execute(query).fetchone()
-                    #else:
-                    #    result = None
-
-                    if result:
-                        self.homologeneid = result[0]
-
-                description_string = unicode(str(self.description).strip(codecs.BOM_UTF8), 'utf-8')
-                target_string = unicode(str(self.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
-
-                if len(description_string) > 1 and description_string != 'None':
-                    description_display = description_string
-                else:
-                    description_display = self.symbol
-
-                if (len(description_display) > 1 and description_display != 'N/A' and
-                        len(target_string) > 1 and target_string != 'None'):
-                    description_display = description_display + '; ' + target_string.strip()
-
-                # Save it for the jinja2 template
-                self.description_display = description_display
-
-                #XZ: trait_location_value is used for sorting
-                trait_location_repr = 'N/A'
-                trait_location_value = 1000000
-
-                if self.chr and self.mb:
-                    #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
-                    #This is so we can convert the location to a number used for sorting
-                    trait_location_value = convert_location_to_value(self.chr, self.mb)
-                     #try:
-                    #    trait_location_value = int(self.chr)*1000 + self.mb
-                    #except ValueError:
-                    #    if self.chr.upper() == 'X':
-                    #        trait_location_value = 20*1000 + self.mb
-                    #    else:
-                    #        trait_location_value = (ord(str(self.chr).upper()[0])*1000 +
-                    #                               self.mb)
-
-                    #ZS: Put this in function currently called "convert_location_to_value"
-                    self.location_repr = 'Chr%s: %.6f' % (self.chr, float(self.mb))
-                    self.location_value = trait_location_value
-
-
-            if get_qtl_info:
-                #LRS and its location
-                self.LRS_score_repr = "N/A"
-                self.LRS_score_value = 0
-                self.LRS_location_repr = "N/A"
-                self.LRS_location_value = 1000000
-                if self.dataset.type == 'ProbeSet' and not self.cellid:
-                    query = """
-                            SELECT
-                                    ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
-                            FROM
-                                    ProbeSetXRef, ProbeSet
-                            WHERE
-                                    ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-                                    ProbeSet.Name = "{}" AND
-                                    ProbeSetXRef.ProbeSetFreezeId ={}
-                            """.format(self.name, self.dataset.id)
-                    logger.sql(query)
-                    trait_qtl = g.db.execute(query).fetchone()
-                    if trait_qtl:
-                        self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl
-                        if self.locus:
-                            query = """
-                                select Geno.Chr, Geno.Mb from Geno, Species
-                                where Species.Name = '{}' and
-                                Geno.Name = '{}' and
-                                Geno.SpeciesId = Species.Id
-                                """.format(self.dataset.group.species, self.locus)
-                            logger.sql(query)
-                            result = g.db.execute(query).fetchone()
-                            if result:
-                                self.locus_chr = result[0]
-                                self.locus_mb = result[1]
-                            else:
-                                self.locus = self.locus_chr = self.locus_mb = self.additive = ""
-                        else:
-                            self.locus = self.locus_chr = self.locus_mb = self.additive = ""
-                    else:
-                        self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = self.additive = ""
-
-
-                if self.dataset.type == 'Publish':
-                    query = """
-                            SELECT
-                                    PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
-                            FROM
-                                    PublishXRef, PublishFreeze
-                            WHERE
-                                    PublishXRef.Id = %s AND
-                                    PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
-                                    PublishFreeze.Id =%s
-                    """ % (self.name, self.dataset.id)
-                    logger.sql(query)
-                    trait_qtl = g.db.execute(query).fetchone()
-                    if trait_qtl:
-                        self.locus, self.lrs, self.additive = trait_qtl
-                        if self.locus:
-                            query = """
-                                select Geno.Chr, Geno.Mb from Geno, Species
-                                where Species.Name = '{}' and
-                                Geno.Name = '{}' and
-                                Geno.SpeciesId = Species.Id
-                                """.format(self.dataset.group.species, self.locus)
-                            logger.sql(query)
-                            result = g.db.execute(query).fetchone()
-                            if result:
-                                self.locus_chr = result[0]
-                                self.locus_mb = result[1]
-                            else:
-                                self.locus = self.locus_chr = self.locus_mb = self.additive = ""
-                        else:
-                            self.locus = self.locus_chr = self.locus_mb = self.additive = ""
-                    else:
-                        self.locus = self.lrs = self.additive = ""
-
-                if (self.dataset.type == 'Publish' or self.dataset.type == "ProbeSet") and self.locus_chr != "" and self.locus_mb != "":
-                    #XZ: LRS_location_value is used for sorting
-                    try:
-                        LRS_location_value = int(self.locus_chr)*1000 + float(self.locus_mb)
-                    except:
-                        if self.locus_chr.upper() == 'X':
-                            LRS_location_value = 20*1000 + float(self.locus_mb)
-                        else:
-                            LRS_location_value = ord(str(self.locus_chr).upper()[0])*1000 + float(self.locus_mb)
-
-                    self.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (self.locus_chr, float(self.locus_mb))
-                    if self.lrs != "":
-                        self.LRS_score_repr = LRS_score_repr = '%3.1f' % self.lrs
-                        self.LRS_score_value = LRS_score_value = self.lrs
-        else:
-            raise KeyError, `self.name`+' information is not found in the database.'
-
-    def genHTML(self, formName = "", dispFromDatabase=0, privilege="guest", userName="Guest", authorized_users=""):
-        if not self.haveinfo:
-            self.retrieveInfo()
-
-        if self.dataset.type == 'Publish':
-            PubMedLink = ""
-            if self.pubmed_id:
-                PubMedLink = HT.Href(text="PubMed %d : " % self.pubmed_id,
-                target = "_blank", url = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id)
-            else:
-                PubMedLink = HT.Span("Unpublished : ", Class="fs15")
-
-            if formName:
-                setDescription2 = HT.Href(url="javascript:showDatabase3('%s','%s','%s','')" %
-                (formName, self.dataset.name, self.name), Class = "fs14")
-            else:
-                setDescription2 = HT.Href(url="javascript:showDatabase2('%s','%s','')" %
-                (self.dataset.name,self.name), Class = "fs14")
-
-            if self.confidential and not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=privilege, userName=userName, authorized_users=authorized_users):
-                setDescription2.append('RecordID/%s - %s' % (self.name, self.pre_publication_description))
-            else:
-                setDescription2.append('RecordID/%s - %s' % (self.name, self.post_publication_description))
-
-            #XZ 03/26/2011: Xiaodong comment out the following two lins as Rob asked. Need to check with Rob why in PublishXRef table, there are few row whose Sequence > 1.
-            #if self.sequence > 1:
-            #       setDescription2.append(' btach %d' % self.sequence)
-            if self.authors:
-                a1 = string.split(self.authors,',')[0]
-                while a1[0] == '"' or a1[0] == "'" :
-                    a1 = a1[1:]
-                setDescription2.append(' by ')
-                setDescription2.append(HT.Italic('%s, and colleagues' % a1))
-            setDescription = HT.Span(PubMedLink, setDescription2)
-
-        elif self.dataset.type == 'Temp':
-            setDescription = HT.Href(text="%s" % (self.description),url="javascript:showDatabase2\
-            ('%s','%s','')" % (self.dataset.name,self.name), Class = "fs14")
-            setDescription = HT.Span(setDescription)
-
-        elif self.dataset.type == 'Geno': # Genome DB only available for single search
-            if formName:
-                setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
-        '%2.3f' % self.mb),url="javascript:showDatabase3('%s','%s','%s','')" % \
-        (formName, self.dataset.name, self.name), Class = "fs14")
-            else:
-                setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
-        '%2.3f' % self.mb),url="javascript:showDatabase2('%s','%s','')" % \
-        (self.dataset.name,self.name), Class = "fs14")
-
-            setDescription = HT.Span(setDescription)
-
-        else:
-            if self.cellid:
-                if formName:
-                    setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name, self.cellid),url=\
-            "javascript:showDatabase3('%s','%s','%s','%s')" % (formName, self.dataset.name,self.name,self.cellid), \
-            Class = "fs14")
-                else:
-                    setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name,self.cellid),url=\
-            "javascript:showDatabase2('%s','%s','%s')" % (self.dataset.name,self.name,self.cellid), \
-            Class = "fs14")
-            else:
-                if formName:
-                    setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
-            "javascript:showDatabase3('%s','%s','%s','')" % (formName, self.dataset.name,self.name), \
-            Class = "fs14")
-                else:
-                    setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
-            "javascript:showDatabase2('%s','%s','')" % (self.dataset.name,self.name), \
-            Class = "fs14")
-            if self.symbol and self.chr and self.mb:
-                setDescription.append(' [')
-                setDescription.append(HT.Italic('%s' % self.symbol,Class="cdg fwb"))
-                setDescription.append(' on Chr %s @ %s Mb]' % (self.chr,self.mb))
-            if self.description:
-                setDescription.append(': %s' % self.description)
-            if self.probe_target_description:
-                setDescription.append('; %s' % self.probe_target_description)
-            setDescription = HT.Span(setDescription)
-
-        if self.dataset.type != 'Temp' and dispFromDatabase:
-            setDescription.append( ' --- FROM : ')
-            setDescription.append(self.dataset.genHTML(Class='cori'))
-        return setDescription
-
     @property
     def name_header_fmt(self):
         '''Return a human-readable name for use in page header'''
@@ -644,7 +207,7 @@ class GeneralTrait(object):
                 formatted = self.post_publication_description
         else:
             formatted = "Not available"
-        return formatted.capitalize()
+        return formatted
 
     @property
     def alias_fmt(self):
@@ -682,62 +245,48 @@ class GeneralTrait(object):
 
         return fmt
 
-
-    def get_database(self):
-        """
-        Returns the database, and the url referring to the database if it exists
-
-        We're going to to return two values here, and we don't want to have to call this twice from
-        the template. So it's not a property called from the template, but instead is called from the view
-
-        """
-        if self.cellid:
-            query = """ select ProbeFreeze.Name from ProbeFreeze, ProbeSetFreeze where
-                            ProbeFreeze.Id =
-                            ProbeSetFreeze.ProbeFreezeId AND
-                            ProbeSetFreeze.Id = %d""" % thisTrait.dataset.id
-            logger.sql(query)
-            probeDBName = g.db.execute(query).fetchone()[0]
-            return dict(name = probeDBName,
-                        url = None)
-        else:
-            return dict(name = self.dataset.fullname,
-                        url = webqtlConfig.INFOPAGEHREF % self.dataset.name)
-
-    def calculate_correlation(self, values, method):
-        """Calculate the correlation value and p value according to the method specified"""
-
-        #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for
-        #There's probably a better way of dealing with this, but I'll have to ask Christian
-        updated_raw_values = []
-        updated_values = []
-        for i in range(len(values)):
-            if values[i] != "None":
-                updated_raw_values.append(self.raw_values[i])
-                updated_values.append(values[i])
-
-        self.raw_values = updated_raw_values
-        values = updated_values
-
-        if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON:
-            corr, nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values))
-        else:
-            corr, nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values))
-
-        self.correlation = corr
-        self.overlap = nOverlap
-
-        if self.overlap < 3:
-            self.p_value = 1.0
-        else:
-            #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented.
-            if abs(self.correlation) >= 1.0:
-                self.p_value = 0.0
-            else:
-                ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
-                ZValue = ZValue*sqrt(self.overlap-3)
-                self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
-
+# In ProbeSet, there are maybe several annotations match one sequence
+# so we need use sequence(BlatSeq) as the identification, when we update
+# one annotation, we update the others who match the sequence also.
+#
+# Hongqiang Li, 3/3/2008
+def getSequence(trait, dataset_name):
+    dataset = create_dataset(dataset_name)
+     
+    if dataset.type == 'ProbeSet':
+        results = g.db.execute('''
+                       SELECT
+                               ProbeSet.BlatSeq
+                       FROM
+                               ProbeSet, ProbeSetFreeze, ProbeSetXRef
+                       WHERE
+                               ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+                               ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+                               ProbeSet.Name = %s
+                               ProbeSetFreeze.Name = %s
+               ''', trait.name, dataset.name).fetchone()
+
+        return results[0]        
+        
+def retrieve_sample_data(trait, dataset, samplelist=None):
+    if samplelist == None:
+        samplelist = []
+
+    results = dataset.retrieve_sample_data(trait.name)
+
+    # Todo: is this necessary? If not remove
+    trait.data.clear()
+
+    all_samples_ordered = dataset.group.all_samples_ordered()
+
+    if results:
+        for item in results:
+            name, value, variance, num_cases, name2 = item
+            if not samplelist or (samplelist and name in samplelist):
+                trait.data[name] = webqtlCaseData(*item)   #name, value, variance, num_cases)
+                
+    return trait
+        
 def convert_location_to_value(chromosome, mb):
     try:
         location_value = int(chromosome)*1000 + float(mb)
@@ -765,3 +314,398 @@ def get_sample_data():
     #    jsonable_sample_data[sample] = trait_ob.data[sample].value
     #
     #return jsonable_sample_data
+    
+def jsonable(trait, dataset_name):
+    """Return a dict suitable for using as json
+
+    Actual turning into json doesn't happen here though"""
+
+    dataset = create_dataset(dataset_name)
+    
+    if dataset.type == "ProbeSet":
+        return dict(name=trait.name,
+                    symbol=trait.symbol,
+                    dataset=dataset.name,
+                    description=trait.description_display,
+                    mean=trait.mean,
+                    location=trait.location_repr,
+                    lrs_score=trait.LRS_score_repr,
+                    lrs_location=trait.LRS_location_repr,
+                    additive=trait.additive
+                    )
+    elif dataset.type == "Publish":
+        if trait.pubmed_id:
+            return dict(name=trait.name,
+                        dataset=dataset.name,
+                        description=trait.description_display,
+                        authors=trait.authors,
+                        pubmed_text=trait.pubmed_text,
+                        pubmed_link=trait.pubmed_link,
+                        lrs_score=trait.LRS_score_repr,
+                        lrs_location=trait.LRS_location_repr,
+                        additive=trait.additive
+                        )
+        else:
+            return dict(name=trait.name,
+                        dataset=dataset.name,
+                        description=trait.description_display,
+                        authors=trait.authors,
+                        pubmed_text=trait.pubmed_text,
+                        lrs_score=trait.LRS_score_repr,
+                        lrs_location=trait.LRS_location_repr,
+                        additive=trait.additive
+                        )
+    elif dataset.type == "Geno":
+        return dict(name=trait.name,
+                    dataset=dataset.name,
+                    location=trait.location_repr
+                    )
+    else:
+        return dict()
+
+def jsonable_table_row(trait, dataset_name, index):
+    """Return a list suitable for json and intended to be displayed in a table
+
+    Actual turning into json doesn't happen here though"""
+
+    dataset = create_dataset(dataset_name)
+    
+    if dataset.type == "ProbeSet":
+        if trait.mean == "":
+            mean = "N/A"
+        else:
+            mean = "%.3f" % round(float(trait.mean), 2)
+        if trait.additive == "":
+            additive = "N/A"
+        else:
+            additive = "%.3f" % round(float(trait.additive), 2)
+        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+                index,
+                '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+                trait.symbol,
+                trait.description_display,
+                trait.location_repr,
+                mean, 
+                trait.LRS_score_repr,
+                trait.LRS_location_repr,
+                additive]
+    elif dataset.type == "Publish":
+        if trait.additive == "":
+            additive = "N/A"
+        else:
+            additive = "%.2f" % round(float(trait.additive), 2)
+        if trait.pubmed_id:
+            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+                    index,
+                    '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+                    trait.description_display,
+                    trait.authors,
+                    '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>',
+                    trait.LRS_score_repr,
+                    trait.LRS_location_repr,
+                    additive]
+        else:
+            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+                    index,
+                    '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+                    trait.description_display,
+                    trait.authors,
+                    trait.pubmed_text,
+                    trait.LRS_score_repr,
+                    trait.LRS_location_repr,
+                    additive]
+    elif dataset.type == "Geno":
+        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+                index,
+                '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+                trait.location_repr]
+    else:
+        return dict()
+
+def retrieve_trait_info(trait, dataset, get_qtl_info=False):
+    assert dataset, "Dataset doesn't exist"
+    
+    if dataset.type == 'Publish':
+        query = """
+                SELECT
+                        PublishXRef.Id, Publication.PubMed_ID,
+                        Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
+                        Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
+                        Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
+                        Publication.Authors, Publication.Title, Publication.Abstract,
+                        Publication.Journal, Publication.Volume, Publication.Pages,
+                        Publication.Month, Publication.Year, PublishXRef.Sequence,
+                        Phenotype.Units, PublishXRef.comments
+                FROM
+                        PublishXRef, Publication, Phenotype, PublishFreeze
+                WHERE
+                        PublishXRef.Id = %s AND
+                        Phenotype.Id = PublishXRef.PhenotypeId AND
+                        Publication.Id = PublishXRef.PublicationId AND
+                        PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                        PublishFreeze.Id = %s
+                """ % (trait.name, dataset.id)
+
+        logger.sql(query)
+        trait_info = g.db.execute(query).fetchone()
+
+
+    #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
+    #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
+    elif dataset.type == 'ProbeSet':
+        display_fields_string = ', ProbeSet.'.join(dataset.display_fields)
+        display_fields_string = 'ProbeSet.' + display_fields_string
+        query = """
+                SELECT %s
+                FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+                WHERE
+                        ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                        ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                        ProbeSetFreeze.Name = '%s' AND
+                        ProbeSet.Name = '%s'
+                """ % (escape(display_fields_string),
+                       escape(dataset.name),
+                       escape(str(trait.name)))
+        logger.sql(query)
+        trait_info = g.db.execute(query).fetchone()
+    #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
+    # to avoid the problem of same marker name from different species.
+    elif dataset.type == 'Geno':
+        display_fields_string = string.join(dataset.display_fields,',Geno.')
+        display_fields_string = 'Geno.' + display_fields_string
+        query = """
+                SELECT %s
+                FROM Geno, GenoFreeze, GenoXRef
+                WHERE
+                        GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+                        GenoXRef.GenoId = Geno.Id AND
+                        GenoFreeze.Name = '%s' AND
+                        Geno.Name = '%s'
+                """ % (escape(display_fields_string),
+                       escape(dataset.name),
+                       escape(trait.name))
+        logger.sql(query)
+        trait_info = g.db.execute(query).fetchone()
+    else: #Temp type
+        query = """SELECT %s FROM %s WHERE Name = %s"""
+        logger.sql(query)
+        trait_info = g.db.execute(query,
+                                  (string.join(dataset.display_fields,','),
+                                               dataset.type, trait.name)).fetchone()
+    if trait_info:
+        trait.haveinfo = True
+
+        #XZ: assign SQL query result to trait attributes.
+        for i, field in enumerate(dataset.display_fields):
+            holder = trait_info[i]
+            if isinstance(trait_info[i], basestring):
+                holder = unicode(trait_info[i], "utf-8", "ignore")
+            setattr(trait, field, holder)
+
+        if dataset.type == 'Publish':
+            trait.confidential = 0
+            if trait.pre_publication_description and not trait.pubmed_id:
+                trait.confidential = 1
+
+            description = trait.post_publication_description
+
+            #If the dataset is confidential and the user has access to confidential
+            #phenotype traits, then display the pre-publication description instead
+            #of the post-publication description
+            if trait.confidential:
+                trait.description_display = trait.pre_publication_description
+
+                #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
+                #        privilege=self.dataset.privilege,
+                #        userName=self.dataset.userName,
+                #        authorized_users=self.authorized_users):
+                #
+                #    description = self.pre_publication_description
+            else:
+                if description:
+                    trait.description_display = description.strip()
+                else:
+                    trait.description_display = ""
+
+            if not trait.year.isdigit():
+                trait.pubmed_text = "N/A"
+            else:
+                trait.pubmed_text = trait.year
+
+            if trait.pubmed_id:
+                trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id
+
+
+        trait.homologeneid = None
+        if dataset.type == 'ProbeSet' and dataset.group:
+            if trait.geneid:
+                #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
+                #XZ: So I have to test if geneid is number before execute the query.
+                #XZ: The geneid values in database should be cleaned up.
+                #try:
+                #    float(self.geneid)
+                #    geneidIsNumber = True
+                #except ValueError:
+                #    geneidIsNumber = False
+                #if geneidIsNumber:
+                query = """
+                        SELECT
+                                HomologeneId
+                        FROM
+                                Homologene, Species, InbredSet
+                        WHERE
+                                Homologene.GeneId ='%s' AND
+                                InbredSet.Name = '%s' AND
+                                InbredSet.SpeciesId = Species.Id AND
+                                Species.TaxonomyId = Homologene.TaxonomyId
+                        """ % (escape(str(trait.geneid)), escape(dataset.group.name))
+                logger.sql(query)
+                result = g.db.execute(query).fetchone()
+                #else:
+                #    result = None
+
+                if result:
+                    trait.homologeneid = result[0]
+
+            description_string = unicode(str(trait.description).strip(codecs.BOM_UTF8), 'utf-8')
+            target_string = unicode(str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
+
+            if len(description_string) > 1 and description_string != 'None':
+                description_display = description_string
+            else:
+                description_display = trait.symbol
+
+            if (len(description_display) > 1 and description_display != 'N/A' and
+                    len(target_string) > 1 and target_string != 'None'):
+                description_display = description_display + '; ' + target_string.strip()
+
+            # Save it for the jinja2 template
+            trait.description_display = description_display
+
+            #XZ: trait_location_value is used for sorting
+            trait.location_repr = 'N/A'
+            trait.location_value = 1000000
+
+            if trait.chr and trait.mb:
+                #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+                #This is so we can convert the location to a number used for sorting
+                trait_location_value = convert_location_to_value(trait.chr, trait.mb)
+                 #try:
+                #    trait_location_value = int(self.chr)*1000 + self.mb
+                #except ValueError:
+                #    if self.chr.upper() == 'X':
+                #        trait_location_value = 20*1000 + self.mb
+                #    else:
+                #        trait_location_value = (ord(str(self.chr).upper()[0])*1000 +
+                #                               self.mb)
+
+                #ZS: Put this in function currently called "convert_location_to_value"
+                trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb))
+                trait.location_value = trait_location_value
+
+        elif dataset.type == "Geno":
+            trait.location_repr = 'N/A'
+            trait.location_value = 1000000
+
+            if trait.chr and trait.mb:
+                #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+                #This is so we can convert the location to a number used for sorting
+                trait_location_value = convert_location_to_value(trait.chr, trait.mb)
+
+                #ZS: Put this in function currently called "convert_location_to_value"
+                trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb))
+                trait.location_value = trait_location_value
+
+        if get_qtl_info:
+            #LRS and its location
+            trait.LRS_score_repr = "N/A"
+            trait.LRS_score_value = 0
+            trait.LRS_location_repr = "N/A"
+            trait.LRS_location_value = 1000000
+            if dataset.type == 'ProbeSet' and not trait.cellid:
+                query = """
+                        SELECT
+                                ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
+                        FROM
+                                ProbeSetXRef, ProbeSet
+                        WHERE
+                                ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                                ProbeSet.Name = "{}" AND
+                                ProbeSetXRef.ProbeSetFreezeId ={}
+                        """.format(trait.name, dataset.id)
+                logger.sql(query)
+                trait_qtl = g.db.execute(query).fetchone()
+                if trait_qtl:
+                    trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl
+                    if trait.locus:
+                        query = """
+                            select Geno.Chr, Geno.Mb from Geno, Species
+                            where Species.Name = '{}' and
+                            Geno.Name = '{}' and
+                            Geno.SpeciesId = Species.Id
+                            """.format(dataset.group.species, trait.locus)
+                        logger.sql(query)
+                        result = g.db.execute(query).fetchone()
+                        if result:
+                            trait.locus_chr = result[0]
+                            trait.locus_mb = result[1]
+                        else:
+                            trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+                    else:
+                        trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+                else:
+                    trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.mean = trait.additive = ""
+
+
+            if dataset.type == 'Publish':
+                query = """
+                        SELECT
+                                PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
+                        FROM
+                                PublishXRef, PublishFreeze
+                        WHERE
+                                PublishXRef.Id = %s AND
+                                PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                                PublishFreeze.Id =%s
+                """ % (trait.name, dataset.id)
+                logger.sql(query)
+                trait_qtl = g.db.execute(query).fetchone()
+                if trait_qtl:
+                    trait.locus, trait.lrs, trait.additive = trait_qtl
+                    if trait.locus:
+                        query = """
+                            select Geno.Chr, Geno.Mb from Geno, Species
+                            where Species.Name = '{}' and
+                            Geno.Name = '{}' and
+                            Geno.SpeciesId = Species.Id
+                            """.format(dataset.group.species, trait.locus)
+                        logger.sql(query)
+                        result = g.db.execute(query).fetchone()
+                        if result:
+                            trait.locus_chr = result[0]
+                            trait.locus_mb = result[1]
+                        else:
+                            trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+                    else:
+                        trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+                else:
+                    trait.locus = trait.lrs = trait.additive = ""
+
+            if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and trait.locus_chr != "" and trait.locus_mb != "":
+                #XZ: LRS_location_value is used for sorting
+                try:
+                    LRS_location_value = int(trait.locus_chr)*1000 + float(trait.locus_mb)
+                except:
+                    if trait.locus_chr.upper() == 'X':
+                        LRS_location_value = 20*1000 + float(trait.locus_mb)
+                    else:
+                        LRS_location_value = ord(str(trait.locus_chr).upper()[0])*1000 + float(trait.locus_mb)
+
+                trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (trait.locus_chr, float(trait.locus_mb))
+                if trait.lrs != "":
+                    trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
+                    trait.LRS_score_value = LRS_score_value = trait.lrs
+    else:
+        raise KeyError, `trait.name`+' information is not found in the database.'
+        
+    return trait
\ No newline at end of file
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 2f88f778..845a7224 100644
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -44,15 +44,15 @@ class webqtlCaseData(object):
 
     def __repr__(self):
         str = "<webqtlCaseData> "
-        if self.value != None:
+        if self.value:
             str += "value=%2.3f" % self.value
-        if self.variance != None:
+        if self.variance:
             str += " variance=%2.3f" % self.variance
-        if self.num_cases != None:
+        if self.num_cases:
             str += " ndata=%d" % self.num_cases
-        if self.name != None:
+        if self.name:
             str += " name=%s" % self.name
-        if self.name2 != None:
+        if self.name2:
             str += " name2=%s" % self.name2
         return str