1 files changed, 1074 insertions, 0 deletions
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
new file mode 100755
index 00000000..ff1f8119
--- /dev/null
+++ b/web/webqtl/compareCorrelates/trait.py
@@ -0,0 +1,1074 @@
+#Trait.py
+#
+#--Individual functions are already annotated, more or less.
+#
+#Classes:
+#RawPoint
+#Trait
+#ProbeSetTrait
+#GenotypeTrait
+#PublishTrait
+#TempTrait
+#-KA
+
+# trait.py: a data structure to represent a trait
+import time
+import string
+
+CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
+
+# RawPoint: to store information about the relationship between two particular
+# traits
+# RawPoint represents directly the input file
+class RawPoint:
+    def __init__(self, i, j):
+        self.i = i
+        self.j = j
+        
+    def __eq__(self, other):
+        return (self.i == other.i and
+                self.j == other.j and
+                self.spearman == other.spearman and
+                self.pearson == other.pearson)
+    
+    def __str__(self):
+        return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
+
+def tdEscapeList(cols, align="left"):
+    """
+    A helper function used by tableRow
+    in Trait that will convert a list of strings into a set of
+    table cells enclosed by <td>%s</td> tags
+    """
+    html = ""
+    for col in cols:
+        html += '<td style="text-align: %s">%s</td>' % (align, col)
+    return html
+
+def thEscapeList(cols):
+    """
+    A helper function used by tableRowHeader
+    in Trait that will convert a list of strings into a set of
+    table cells enclosed by <td>%s</td> tags
+    """
+    html = ""
+    for col in cols:
+        html += "<th>%s</th>" % col
+    return html
+
+def commaEscapeList(cols):
+    """
+    A helper function used by csvHeader and csvRow.
+    Really it's just a wrapper for string.join
+    """
+    return '"' + string.join(cols, '","') + '"'
+
+
+class Trait:
+    """
+    A trait represents an attribute of an object. In the WebQTL database, traits are stored
+    as ProbeSets; that is, the average values of a set of probes are stored.
+    """
+    def __init__(self, id="", name="", description="", symbol="", href=""):
+        self.id  = id
+        self.name = name
+	self.dbName = ""
+        self.symbol = symbol
+        self.href = href
+        self.strainData = {}
+
+    def populateDataId(self, cursor, freezeId):
+        """
+        Retrieve the dataId for trait data corresponding to the given database
+        The way to do this depends on the particular type of trait, so we leave implementation
+        to subclasses.
+        """
+        raise NotImplementedError
+    
+    def populateStrainData(self, cursor):
+        """
+        Load this trait full of train data corresponding to the data id
+        The data id can either come from populateDataId
+        or can be set manually by the user of this class.
+        Xiaodong added: The way to do this depends on the particular type of trait,
+        so we leave implementation to subclasses.
+
+        """
+        raise NotImplementedError
+
+    def shortName(self):
+        """
+        To return a short name for this trait; this name should be
+        appropriate for a row or column title
+        """
+        return self.name
+
+    def nameNoDB(self):
+        """
+        To return only the short name without the database attached
+        """
+        strArray = self.shortName().split('::')
+        
+        return strArray[1]
+    
+    def datasetName(self):
+        """
+        To return only the name of the dataset
+        """
+        strArray = self.shortName().split('::')
+        
+        return strArray[0].strip()
+
+    def longName(self):
+        """
+        To return a long name for this trait; this name should be
+        appropriate for a key to a table
+        """
+        return self.shortName()
+
+    def __str__(self):
+        return self.shortName()
+
+    def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
+        """
+        tableRowHelper: (arrayof String) -. String
+        To generate a table row to represent this object, appending
+        the additional information in beforeCols and afterCols
+        to the beginning and the end
+        """
+        thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
+                                              self.name)
+        html = '<tr bgcolor="%s">' % color
+        html += tdEscapeList(beforeCols + thisRow)
+        html += tdEscapeList(afterCols, align="right")
+        html += "</tr>"
+        
+        return html
+
+
+    def header(self):
+        """
+        header: (listof String)
+        To generate a list of strings describing each piece of data
+        returned by row
+        """
+        raise NotImplementedError
+
+    def row(self):
+        """
+        row: (listof String)
+        To generate a list of strings describing this object. The
+        elements of this list should be described by header()
+        """
+        raise NotImplementedError
+    
+    def tableRowHeader(self, beforeCols, afterCols, color):
+        """
+        tableRowHeader: (arrayof String) -> (arrayof String) -> String
+        To generate a table row header to represent this object,
+        appending the additional information in beforeCols and
+        afterCols to the beginning and end
+        """
+        html = '<tr bgcolor="%s">' % color
+        html += thEscapeList(beforeCols + self.header() +
+                             afterCols)
+        html += "</tr>"
+        return html
+
+    def csvHeader(self, beforeCols, afterCols):
+        return commaEscapeList(beforeCols + self.header() + afterCols)
+    
+    def csvRow(self, beforeCols, afterCols):
+        return commaEscapeList(beforeCols + self.row() + afterCols)
+    
+        
+    def traitInfoLink(self):
+        """
+        To build a trait info link to show information about this
+        trait. We assume that the database attribute is properly set
+        on the hidden form on the page where this link will go.
+        """
+        return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
+
+# ProbeSetTrait: a trait with data from a probeset
+class ProbeSetTrait(Trait):
+    def __init__(self, id="", name="", description="", symbol="", href="",
+                 chromosome="", MB="", GeneId=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.description = description
+        self.symbol = symbol
+        self.chromosome = chromosome
+        self.MB = MB
+        self.GeneId = GeneId
+        
+    def populateDataId(self, cursor, freezeId):
+        """
+        Look up the data id for this trait given which
+        freeze it came from.
+        """
+        cursor.execute('''
+        SELECT
+          ProbeSetXRef.DataId
+        FROM
+          ProbeSetXRef
+        WHERE
+          ProbeSetId = %s AND
+          ProbeSetFreezeId = %s
+        ''' % (self.id, freezeId))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          ProbeSetData.StrainId,
+          ProbeSetData.value
+        FROM
+          ProbeSetData
+        WHERE
+          ProbeSetData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+
+    def shortName(self):
+        """
+        An improved string method that uses the gene symbol where
+        we have it
+        """
+        if self.symbol != "":
+            return self.symbol
+        else:
+            return Trait.shortName(self)
+
+    def longName(self):
+        """
+        We use several bits of genetic information to give
+        useful information about this trait and where it is
+        """
+        if self.chromosome != "":
+            chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
+                                                     self.chromosome,
+                                                     self.MB)
+        else:
+            chrPart = ""
+
+        return "%s%s: %s" % (self.name, chrPart, self.description)
+
+    def header(self):
+        return ["Name", "Symbol", "Description",
+                "Chr", "Position (Mb)"]
+
+    def row(self):
+        if type(self.MB) is float:
+            MB = "%.2f" % self.MB
+        else:
+            MB = ""
+            
+        return [self.name, self.symbol, self.description,
+                self.chromosome, MB]
+    
+    def tableRow(self, beforeCols, afterCols, color):
+        """
+        tableRow: (arrayof String) -> (arrayof String) -> String
+        To generate a table row to represent this object, appending
+        the additional information in beforeCols and afterCols to the
+        beginning and end
+        """
+        thisRow = self.row()
+
+        # trim description
+        if len(thisRow[2]) > 20:
+            thisRow[2] = thisRow[2][:20] + "..."
+
+        # add NCBI info link 
+        thisRow[1] = self.ncbiInfoLink()
+
+        return self.tableRowHelper(beforeCols, afterCols, color,
+                                   thisRow)
+
+
+    def ncbiInfoLink(self):
+        """
+        ncbiInfoLink :: String
+        To generate an NCBI info link for this trait. If we have a GeneId,
+        then we can go straight to the gene. If not, then we generate a search
+        link based on the gene symbol. If we have none of them, then we don't
+        generate a link at all.
+        """
+        if self.GeneId != "":
+            cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
+        elif self.symbol != "":
+            cmd = "cmd=Search&term=%s" % self.symbol
+        else:
+            return ""
+
+        return '''
+        <a target="_new"
+           href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
+           %s</a> ''' % (cmd, self.symbol)
+
+
+# GenotypeTrait: a trait with data from the genotype
+class GenotypeTrait(Trait):
+    def __init__(self, id="", name="", href="", chromosome="", MB=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.chromosome = chromosome
+        self.MB = MB
+
+    def populateDataId(self, cursor, freezeId):
+        """
+        Look up the data id for this trait from the
+        genotype.
+        """
+        cursor.execute('''
+        SELECT
+          GenoXRef.DataId
+        FROM
+          GenoXRef
+        WHERE
+          GenoId = %s AND
+          GenoFreezeId = %s
+        ''' % (self.id, freezeId))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          GenoData.StrainId,
+          GenoData.value
+        FROM
+          GenoData
+        WHERE
+          GenoData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+    def header(self):
+        return ["Locus", "Chr", "Position (Mb)"]
+
+    def row(self):
+        return [self.name, self.chromosome, "%.3f" % self.MB]
+
+    def tableRow(self, beforeCols, afterCols, color):
+        return self.tableRowHelper(beforeCols, afterCols, color, self.row())
+
+# PublishTrait: a trait with data from publications
+class PublishTrait(Trait):
+    def __init__(self, id="", name="", href="", authors="", title="",
+                 phenotype="", year=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.authors = authors
+        self.title = title
+        self.phenotype = phenotype
+        self.year = year
+
+    def populateDataId(self, cursor, freezeId):
+        """
+        Look up the data id for this trait from the
+        published set. For the moment, we assume that there's
+        only one publish freeze.
+        """
+        cursor.execute('''
+        SELECT
+          PublishXRef.DataId
+        FROM
+          PublishXRef, PublishFreeze
+        WHERE
+          PublishFreeze.Id = %s AND 
+          PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+          PublishXRef.Id = %s 
+        ''' % (freezeId, self.id))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          PublishData.StrainId,
+          PublishData.value
+        FROM
+          PublishData
+        WHERE
+          PublishData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+
+    def longName(self):
+        """
+        A more intelligent string function that uses
+        information about the publication from which this trait came
+        """
+        return "%s: %s by %s" % (self.name, self.title, self.authors)
+
+    def header(self):
+        return ["Record", "Phenotype", "Authors", "Year", "URL"]
+
+    def row(self):
+        return [self.name,
+                self.phenotype,
+                self.authors,
+                str(self.year),
+                ""]
+    
+    def tableRow(self, beforeCols, afterCols, color):
+        """
+        tableRow: (arrayof String) -> (arrayof String) ->  String
+        To generate a table row to represent this object, appending
+        the additional information in beforeCols and afterCols to the
+        beginning and end
+        """
+        thisRow = self.row()
+
+        # for multiple authors, use "et. al" after first two
+        authors = thisRow[2].split(",")
+        if len(authors) > 2:
+            thisRow[2] = string.join(authors[:2], ",") + ", et al"
+
+        # clip phenotype to 20 chars
+        if len(thisRow[1]) > 20:
+            thisRow[1] = thisRow[1][:20] + "..."
+
+        # add Pub Med URL
+        thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
+
+        return self.tableRowHelper(beforeCols, afterCols, color,
+                                   thisRow)
+
+
+# TempTrait: a trait with data generate by user and stored in temp table
+class TempTrait(Trait):
+    def __init__(self, id="", name="", href="", description=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.description = description
+
+    def populateDataId(self, cursor, freeezeId):
+        """
+        Look up the data id for this trait from the Temp table, freezeId isn't used, 
+        it just for fixing the inherit
+        """
+        cursor.execute('''
+        SELECT
+          DataId
+        FROM
+          Temp
+        WHERE
+          Id=%s
+        ''' % (self.id))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          TempData.StrainId,
+          TempData.value
+        FROM
+          TempData
+        WHERE
+          TempData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+
+    def row(self):
+        return [self.id,
+                self.name,
+                self.description,
+                ""]
+    
+
+    def longName(self):
+        """
+        For temp trait, the description always contents whole useful information
+        """
+        return self.description
+
+
+# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
+def queryGenotypeTraitByName(cursor, speciesId, name):
+    qry = '''
+    SELECT
+      Geno.Id,
+      Geno.Name,
+      Geno.Chr,
+      Geno.Mb
+    FROM
+      Geno
+    WHERE
+      Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
+
+    cursor.execute(qry)
+    row = cursor.fetchone()
+    return GenotypeTrait(id=row[0], name=row[1],
+                         chromosome=row[2], MB=row[3])
+
+# queryPublishTraitByName : Cursor -> string -> PublishTrait
+def queryPublishTraitByName(cursor, freezeId, name):
+    qry = '''
+    SELECT
+      PublishXRef.Id,
+      Phenotype.Id,
+      Publication.Authors,
+      Publication.Title,
+      Publication.Year,
+      Publication.PubMed_ID
+    FROM
+      Publication, PublishXRef, Phenotype, PublishFreeze
+    WHERE
+      PublishFreeze.Id = %s AND 
+      PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+      PublishXRef.Id = %s AND 
+      PublishXRef.PublicationId = Publication.Id AND
+      PublishXRef.PhenotypeId = Phenotype.Id 
+      ''' % (freezeId, name)
+
+    cursor.execute(qry)
+    if cursor.rowcount == 0:
+        return None
+    else:
+        row = cursor.fetchone()
+        
+        return PublishTrait(id=row[0], name='%s'%row[0],
+                            authors=row[2], title=row[3],
+                            year=row[4], href=row[5])
+
+
+def queryTempTraitByName(cursor, name):
+    name=name.strip()
+    qry = '''
+    SELECT
+      Temp.Id,
+      Temp.Name,
+      Temp.description
+    FROM
+      Temp
+    WHERE
+      Temp.Name= "%s"
+      ''' % (name)
+
+    cursor.execute(qry)
+    if cursor.rowcount == 0:
+        return None
+    else:
+        row = cursor.fetchone()
+        return TempTrait(id=row[0], name=row[1], description=row[2], href='')
+
+# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
+# to retrieve an entire probeset fully populated with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+#XZ, 03/04/2009: It seems to me that this function is never be executed.
+#XZ: Although it can be called from multitrait.loadDatabase,
+#XZ: but the loadDatabase function will not be called
+#XZ: if the targetDatabaseType is probeset.
+#XZ: The probeset traits of target database are retrieved by execute
+#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
+def queryPopulatedProbeSetTraits(cursor, freezeId):
+    step1 = time.time()
+    traits = queryProbeSetTraits(cursor, freezeId)
+    traitDict = {}
+    for trait in traits:
+        traitDict[trait.id] = trait
+        
+    step2 = time.time()
+    print 
+    #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+    cursor.execute('''
+    SELECT
+      ProbeSetXRef.ProbeSetId,
+      ProbeSetData.StrainId,
+      ProbeSetData.value
+    FROM
+      ProbeSetXRef
+    Left Join ProbeSetData ON
+      ProbeSetXRef.DataId = ProbeSetData.Id
+    WHERE
+      ProbeSetXRef.ProbeSetFreezeId = %s
+    ''' % freezeId)
+
+    step3 = time.time()
+    totalrows = 0
+    somerows = cursor.fetchmany(1000)
+    while len(somerows) > 0:
+        totalrows += len(somerows)
+        for row in somerows:
+            # this line of code can execute more than one million times
+            traitDict[row[0]].strainData[int(row[1])] = row[2]
+        somerows = cursor.fetchmany(1000)
+
+    #cursor.close()
+    step4 = time.time()
+    
+    time1 = step2 - step1
+    time2 = step3 - step2
+    time3 = step4 - step3
+    time4 = step4 - step1
+    #print "%f %f %f %f %d rows" % (round(time1, 2),
+    #                               round(time2, 2),
+    #                               round(time3, 2),
+    #                               round(time4, 2),
+    #                               totalrows)
+    #print "Fetched %d traits" % len(traits)
+    return traits
+
+
+# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
+# to retrieve probeset fully populated whose ProbeSetId in a range
+# a special ProbeSetId with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+    step1 = time.time()
+    traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
+    traitDict = {}
+    for trait in traits:
+        traitDict[trait.id] = trait
+
+    step2 = time.time()
+    print
+    #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+    cursor.execute('''
+    SELECT
+      ProbeSetXRef.ProbeSetId,
+      ProbeSetData.StrainId,
+      ProbeSetData.value
+    FROM
+      ProbeSetXRef
+    Left Join ProbeSetData ON
+      ProbeSetXRef.DataId = ProbeSetData.Id
+    WHERE
+      ProbeSetXRef.ProbeSetFreezeId = %s AND
+      ProbeSetXRef.ProbeSetId >= %s AND
+      ProbeSetXRef.ProbeSetId <= %s
+      ''' % (freezeId, ProbeSetId1, ProbeSetId2))
+
+    step3 = time.time()
+    totalrows = 0
+    somerows = cursor.fetchmany(1000)
+    while len(somerows) > 0:
+        totalrows += len(somerows)
+        for row in somerows:
+            # this line of code can execute more than one million times
+            traitDict[row[0]].strainData[int(row[1])] = row[2]
+        somerows = cursor.fetchmany(1000)
+
+    #cursor.close()
+    step4 = time.time()
+
+    time1 = step2 - step1
+    time2 = step3 - step2
+    time3 = step4 - step3
+    time4 = step4 - step1
+    #print "%f %f %f %f %d rows" % (round(time1, 2),
+    #                               round(time2, 2),
+    #                               round(time3, 2),
+    #                               round(time4, 2),
+    #                               totalrows)
+    #print "Fetched %d traits" % len(traits)
+    return traits
+
+
+# def noneFilter : string or none -> string
+# to replace a possible None by an empty string
+def noneFilter(x):
+    if x is None:
+        return ""
+    else:
+        return x
+
+# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits(cursor, freezeId):
+    """
+    To locate all of the traits in a particular probeset
+    """
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSet.symbol,
+      ProbeSet.Chr,
+      ProbeSet.Mb,
+      ProbeSet.GeneId,
+      ProbeSetXRef.DataId
+    FROM
+      ProbeSet,
+      ProbeSetXRef
+    WHERE
+      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+      ProbeSetXRef.ProbeSetFreezeId = %s
+    ORDER BY ProbeSet.Id
+    ''' % freezeId
+
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+
+    for row in rows:
+        row = map(noneFilter, row)
+        trait = ProbeSetTrait(id=row[0], name=row[1],
+                              description=row[2],
+                              chromosome=row[4],
+                              MB=row[5],
+                              symbol=row[3],
+                              GeneId=row[6])
+        trait.dataId = row[7]
+        traits.append(trait)
+
+    return traits
+
+
+# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+    """
+    To locate all of the traits in a particular probeset
+    """
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSet.symbol,
+      ProbeSet.Chr,
+      ProbeSet.Mb,
+      ProbeSet.GeneId,
+      ProbeSetXRef.DataId
+    FROM
+      ProbeSet,
+      ProbeSetXRef
+    WHERE
+      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+      ProbeSetXRef.ProbeSetFreezeId = %s AND
+      ProbeSet.Id >= %s AND
+      ProbeSet.Id <= %s
+    ORDER BY ProbeSet.Id
+    ''' % (freezeId, ProbeSetId1, ProbeSetId2)
+
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+
+    for row in rows:
+        row = map(noneFilter, row)
+        trait = ProbeSetTrait(id=row[0], name=row[1],
+                              description=row[2],
+                              chromosome=row[4],
+                              MB=row[5],
+                              symbol=row[3],
+                              GeneId=row[6])
+        trait.dataId = row[7]
+        traits.append(trait)
+
+    return traits
+
+
+# queryPublishTraits : Cursor -> arrayof Trait
+def queryPublishTraits(cursor, freezeId):
+    """
+    To locate all published traits
+    """
+    qry = '''
+    SELECT
+      Publication.Id,
+      Publication.Name,
+      Publication.PhenoType,
+      Publication.Authors,
+      Publication.Title,
+      Publication.Year,
+      Publication.PubMed_ID,
+      PublishXRef.DataId
+    FROM
+      Publication,
+      PublishXRef
+    WHERE
+      PublishXRef.PublishFreezeId = %s AND
+      PublishXRef.PublishId = Publication.Id
+    ''' % freezeId
+    
+    qry = '''
+    SELECT
+      Publication.Id,
+      PublishXRef.Id,
+      Phenotype.Pre_publication_description,
+      Phenotype.Post_publication_description,
+      Publication.Authors,
+      Publication.Title,
+      Publication.Year,
+      Publication.PubMed_ID,
+      PublishXRef.DataId
+    FROM
+      Publication, PublishXRef, Phenotype, PublishFreeze
+    WHERE
+      PublishFreeze.Id = %s AND 
+      PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+      PublishXRef.PublicationId = Publication.Id AND
+      PublishXRef.PhenotypeId = Phenotype.Id 
+      ''' % freezeId
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+    for row in rows:
+        PhenotypeString = row[3]
+        if not row[7] and row[2]:
+            PhenotypeString = row[2]
+        trait = PublishTrait(id=row[0], name= '%s' %row[1],
+                             phenotype=PhenotypeString,
+                             authors=row[4],
+                             title=row[5],
+                             year=row[6],
+                             href=row[7])
+        trait.dataId = row[8]
+        traits.append(trait)
+        
+    return traits
+
+# queryGenotypeTraits : Cursor -> arrayof Trait
+def queryGenotypeTraits(cursor, freezeId):
+    """
+    To locate all traits in the genotype
+    """
+    qry =    '''
+    SELECT
+      Geno.Id,
+      Geno.Name,
+      Geno.Chr,
+      GenoXRef.DataId,
+      Geno.Mb
+    FROM
+      Geno,
+      GenoXRef
+    WHERE
+      GenoXRef.GenoId = Geno.Id
+      AND GenoXRef.GenoFreezeId = %s
+    ''' % freezeId
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+    
+    for row in rows:
+        trait = GenotypeTrait(id=row[0], name=row[1],
+                              chromosome=row[2], MB=row[4])
+        trait.dataId = row[3]
+        traits.append(trait)
+        
+    return traits
+
+# queryProbeSetTraitByName : Cursor -> string -> Trait
+# to find a particular trait given its name 
+def queryProbeSetTraitByName(cursor, name):
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSet.symbol,
+      ProbeSet.Chr,
+      ProbeSet.Mb,
+      ProbeSet.GeneId
+    FROM
+      ProbeSet
+    WHERE
+      ProbeSet.Name = "%s"
+    ''' % name
+    #print qry
+    cursor.execute(qry)
+    row = cursor.fetchone()
+
+    # convert a MySQL NULL value to an empty string
+    # for gene symbol
+    if row[3] is None:
+        sym = ""
+    else:
+        sym = row[3]
+        
+    return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
+                         symbol=sym, chromosome=row[4], MB=row[5],
+                         GeneId=row[6])
+    
+                     
+# queryTraits : Cursor -> string -> string -> arrayof Traits
+# to find all of the traits whose descriptions match a certain string in a
+# particular database
+def queryTraits(cursor, dbId, queryString):
+    # we do this in two steps:
+    # first we get the data id for the matching traits
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSetXRef.DataId
+    FROM
+      ProbeSet,
+      ProbeSetXRef
+    WHERE
+      ProbeSetXRef.ProbeSetFreezeId = %s AND
+      ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
+      ProbeSet.description LIKE "%%%s%%"
+    ''' % (dbId, queryString)
+    #    print qry
+    cursor.execute(qry)
+
+    if cursor.rowcount == 0:
+        print "No traits found"
+        return []
+    else:
+        print "%s traits found" % (cursor.rowcount)
+
+    # maybe fetchall is bad; we will see
+    traits = []
+    for row in cursor.fetchall():
+        myTrait = Trait(row[0], row[1], row[2])
+        myTrait.dataId = row[3]
+        traits.append(myTrait)
+
+    # second we pull all of the strain data for each trait
+    print "Retrieving individual trait data..."
+    for trait in traits:
+        trait.populateStrainData(cursor, trait.dataId)
+        print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
+
+    print "done"
+    return traits
+
+# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
+# to return the short and long name for each ProbeSetFreeze
+# this function is designed specifically for building
+# a database selector
+def queryProbeSetFreezes(cursor):
+    cursor.execute("""
+    SELECT
+      ProbeSetFreeze.Name,
+      ProbeSetFreeze.FullName
+    FROM
+      ProbeSetFreeze
+    ORDER BY
+      ProbeSetFreeze.Name
+    """)
+
+    # for now, fetchall returns the data as a list of tuples
+    # which is what we want
+    return list(cursor.fetchall())
+
+# queryProbeSetFreezeIdName: Cursor -> String -> String, String
+# this function returns the
+# id and the long name of a probesetfreeze given its name
+# once again, it's designed specifically for building
+# the database selector
+def queryProbeSetFreezeIdName(cursor, name):
+    qry = ('''
+    SELECT
+      ProbeSetFreeze.Id,
+      ProbeSetFreeze.FullName
+    FROM
+      ProbeSetFreeze
+    WHERE
+      ProbeSetFreeze.Name = "%s" 
+    ''' % name)
+    #print qry
+    cursor.execute(qry)
+
+    row = cursor.fetchone()
+    return row
+
+# queryProbeSetFreezeName: Cursor -> String -> String
+# to return the name of a probe set freeze given its id
+def queryProbeSetFreezeName(cursor, id):
+    cursor.execute('''
+    SELECT
+      ProbeSetFreeze.FullName
+    FROM
+      ProbeSetFreeze
+    WHERE
+      ProbeSetFreeze.Id = %s
+    ''' % id)
+
+    row = cursor.fetchone()
+    return row[0]
+
+# dbNameToTypeId : Cursor -> String -> (String, String)
+# to convert a database name to a (type, id) pair
+def dbNameToTypeId(cursor, name):
+    types = ["ProbeSet", "Geno", "Publish"]
+    for type_ in types:
+        count = cursor.execute('''
+        SELECT
+          %sFreeze.Id
+        FROM
+          %sFreeze
+        WHERE
+          Name = "%s"
+        ''' % (type_, type_,  name))
+
+        if count != 0:
+            id = cursor.fetchone()[0]
+            return type_, id
+
+    return None, None
+
+# dbTypeIdToName : Cursor -> String -> String -> String
+# to convert a database (type,id) pair into a name
+def dbTypeIdToName(cursor, dbType, dbId):
+    cursor.execute('''
+    SELECT
+      %sFreeze.Name
+    FROM
+      %sFreeze
+    WHERE
+    Id = %s
+    ''' % (dbType, dbType, dbId))
+    
+    row = cursor.fetchone()
+    return row[0]
+
+#XZ, July 21, 2010: I add this function
+def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
+    cursor.execute('''
+    SELECT
+      SpeciesId
+    FROM
+      InbredSet, %sFreeze
+    WHERE
+    %sFreeze.Id = %s
+    and InbredSetId = InbredSet.Id
+    ''' % (dbType, dbType, dbId))
+
+    row = cursor.fetchone()
+    return row[0]
+
+
+# queryStrainCount : Cursor -> int
+# return the number of strains in the database
+def queryStrainCount(cursor):
+    cursor.execute('''
+    SELECT
+      Max(Strain.Id)
+    FROM
+      Strain
+    ''')
+    return (cursor.fetchone())[0]