aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/compareCorrelates/trait.py
diff options
context:
space:
mode:
authorroot2012-05-08 18:39:56 -0500
committerroot2012-05-08 18:39:56 -0500
commitea46f42ee640928b92947bfb204c41a482d80937 (patch)
tree9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/compareCorrelates/trait.py
parent056b5253fc3857b0444382aa39944f6344dc1ceb (diff)
downloadgenenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz
Add all the source codes into the github.
Diffstat (limited to 'web/webqtl/compareCorrelates/trait.py')
-rwxr-xr-xweb/webqtl/compareCorrelates/trait.py1074
1 files changed, 1074 insertions, 0 deletions
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
new file mode 100755
index 00000000..ff1f8119
--- /dev/null
+++ b/web/webqtl/compareCorrelates/trait.py
@@ -0,0 +1,1074 @@
+#Trait.py
+#
+#--Individual functions are already annotated, more or less.
+#
+#Classes:
+#RawPoint
+#Trait
+#ProbeSetTrait
+#GenotypeTrait
+#PublishTrait
+#TempTrait
+#-KA
+
+# trait.py: a data structure to represent a trait
+import time
+import string
+
+CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
+
+# RawPoint: to store information about the relationship between two particular
+# traits
+# RawPoint represents directly the input file
+class RawPoint:
+ def __init__(self, i, j):
+ self.i = i
+ self.j = j
+
+ def __eq__(self, other):
+ return (self.i == other.i and
+ self.j == other.j and
+ self.spearman == other.spearman and
+ self.pearson == other.pearson)
+
+ def __str__(self):
+ return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
+
+def tdEscapeList(cols, align="left"):
+ """
+ A helper function used by tableRow
+ in Trait that will convert a list of strings into a set of
+ table cells enclosed by <td>%s</td> tags
+ """
+ html = ""
+ for col in cols:
+ html += '<td style="text-align: %s">%s</td>' % (align, col)
+ return html
+
+def thEscapeList(cols):
+ """
+ A helper function used by tableRowHeader
+ in Trait that will convert a list of strings into a set of
+ table cells enclosed by <td>%s</td> tags
+ """
+ html = ""
+ for col in cols:
+ html += "<th>%s</th>" % col
+ return html
+
+def commaEscapeList(cols):
+ """
+ A helper function used by csvHeader and csvRow.
+ Really it's just a wrapper for string.join
+ """
+ return '"' + string.join(cols, '","') + '"'
+
+
+class Trait:
+ """
+ A trait represents an attribute of an object. In the WebQTL database, traits are stored
+ as ProbeSets; that is, the average values of a set of probes are stored.
+ """
+ def __init__(self, id="", name="", description="", symbol="", href=""):
+ self.id = id
+ self.name = name
+ self.dbName = ""
+ self.symbol = symbol
+ self.href = href
+ self.strainData = {}
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Retrieve the dataId for trait data corresponding to the given database
+ The way to do this depends on the particular type of trait, so we leave implementation
+ to subclasses.
+ """
+ raise NotImplementedError
+
+ def populateStrainData(self, cursor):
+ """
+ Load this trait full of train data corresponding to the data id
+ The data id can either come from populateDataId
+ or can be set manually by the user of this class.
+ Xiaodong added: The way to do this depends on the particular type of trait,
+ so we leave implementation to subclasses.
+
+ """
+ raise NotImplementedError
+
+ def shortName(self):
+ """
+ To return a short name for this trait; this name should be
+ appropriate for a row or column title
+ """
+ return self.name
+
+ def nameNoDB(self):
+ """
+ To return only the short name without the database attached
+ """
+ strArray = self.shortName().split('::')
+
+ return strArray[1]
+
+ def datasetName(self):
+ """
+ To return only the name of the dataset
+ """
+ strArray = self.shortName().split('::')
+
+ return strArray[0].strip()
+
+ def longName(self):
+ """
+ To return a long name for this trait; this name should be
+ appropriate for a key to a table
+ """
+ return self.shortName()
+
+ def __str__(self):
+ return self.shortName()
+
+ def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
+ """
+ tableRowHelper: (arrayof String) -. String
+ To generate a table row to represent this object, appending
+ the additional information in beforeCols and afterCols
+ to the beginning and the end
+ """
+ thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
+ self.name)
+ html = '<tr bgcolor="%s">' % color
+ html += tdEscapeList(beforeCols + thisRow)
+ html += tdEscapeList(afterCols, align="right")
+ html += "</tr>"
+
+ return html
+
+
+ def header(self):
+ """
+ header: (listof String)
+ To generate a list of strings describing each piece of data
+ returned by row
+ """
+ raise NotImplementedError
+
+ def row(self):
+ """
+ row: (listof String)
+ To generate a list of strings describing this object. The
+ elements of this list should be described by header()
+ """
+ raise NotImplementedError
+
+ def tableRowHeader(self, beforeCols, afterCols, color):
+ """
+ tableRowHeader: (arrayof String) -> (arrayof String) -> String
+ To generate a table row header to represent this object,
+ appending the additional information in beforeCols and
+ afterCols to the beginning and end
+ """
+ html = '<tr bgcolor="%s">' % color
+ html += thEscapeList(beforeCols + self.header() +
+ afterCols)
+ html += "</tr>"
+ return html
+
+ def csvHeader(self, beforeCols, afterCols):
+ return commaEscapeList(beforeCols + self.header() + afterCols)
+
+ def csvRow(self, beforeCols, afterCols):
+ return commaEscapeList(beforeCols + self.row() + afterCols)
+
+
+ def traitInfoLink(self):
+ """
+ To build a trait info link to show information about this
+ trait. We assume that the database attribute is properly set
+ on the hidden form on the page where this link will go.
+ """
+ return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
+
+# ProbeSetTrait: a trait with data from a probeset
+class ProbeSetTrait(Trait):
+ def __init__(self, id="", name="", description="", symbol="", href="",
+ chromosome="", MB="", GeneId=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.description = description
+ self.symbol = symbol
+ self.chromosome = chromosome
+ self.MB = MB
+ self.GeneId = GeneId
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Look up the data id for this trait given which
+ freeze it came from.
+ """
+ cursor.execute('''
+ SELECT
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSetXRef
+ WHERE
+ ProbeSetId = %s AND
+ ProbeSetFreezeId = %s
+ ''' % (self.id, freezeId))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ ProbeSetData.StrainId,
+ ProbeSetData.value
+ FROM
+ ProbeSetData
+ WHERE
+ ProbeSetData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+
+ def shortName(self):
+ """
+ An improved string method that uses the gene symbol where
+ we have it
+ """
+ if self.symbol != "":
+ return self.symbol
+ else:
+ return Trait.shortName(self)
+
+ def longName(self):
+ """
+ We use several bits of genetic information to give
+ useful information about this trait and where it is
+ """
+ if self.chromosome != "":
+ chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
+ self.chromosome,
+ self.MB)
+ else:
+ chrPart = ""
+
+ return "%s%s: %s" % (self.name, chrPart, self.description)
+
+ def header(self):
+ return ["Name", "Symbol", "Description",
+ "Chr", "Position (Mb)"]
+
+ def row(self):
+ if type(self.MB) is float:
+ MB = "%.2f" % self.MB
+ else:
+ MB = ""
+
+ return [self.name, self.symbol, self.description,
+ self.chromosome, MB]
+
+ def tableRow(self, beforeCols, afterCols, color):
+ """
+ tableRow: (arrayof String) -> (arrayof String) -> String
+ To generate a table row to represent this object, appending
+ the additional information in beforeCols and afterCols to the
+ beginning and end
+ """
+ thisRow = self.row()
+
+ # trim description
+ if len(thisRow[2]) > 20:
+ thisRow[2] = thisRow[2][:20] + "..."
+
+ # add NCBI info link
+ thisRow[1] = self.ncbiInfoLink()
+
+ return self.tableRowHelper(beforeCols, afterCols, color,
+ thisRow)
+
+
+ def ncbiInfoLink(self):
+ """
+ ncbiInfoLink :: String
+ To generate an NCBI info link for this trait. If we have a GeneId,
+ then we can go straight to the gene. If not, then we generate a search
+ link based on the gene symbol. If we have none of them, then we don't
+ generate a link at all.
+ """
+ if self.GeneId != "":
+ cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
+ elif self.symbol != "":
+ cmd = "cmd=Search&term=%s" % self.symbol
+ else:
+ return ""
+
+ return '''
+ <a target="_new"
+ href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
+ %s</a> ''' % (cmd, self.symbol)
+
+
+# GenotypeTrait: a trait with data from the genotype
+class GenotypeTrait(Trait):
+ def __init__(self, id="", name="", href="", chromosome="", MB=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.chromosome = chromosome
+ self.MB = MB
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Look up the data id for this trait from the
+ genotype.
+ """
+ cursor.execute('''
+ SELECT
+ GenoXRef.DataId
+ FROM
+ GenoXRef
+ WHERE
+ GenoId = %s AND
+ GenoFreezeId = %s
+ ''' % (self.id, freezeId))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ GenoData.StrainId,
+ GenoData.value
+ FROM
+ GenoData
+ WHERE
+ GenoData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+ def header(self):
+ return ["Locus", "Chr", "Position (Mb)"]
+
+ def row(self):
+ return [self.name, self.chromosome, "%.3f" % self.MB]
+
+ def tableRow(self, beforeCols, afterCols, color):
+ return self.tableRowHelper(beforeCols, afterCols, color, self.row())
+
+# PublishTrait: a trait with data from publications
+class PublishTrait(Trait):
+ def __init__(self, id="", name="", href="", authors="", title="",
+ phenotype="", year=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.authors = authors
+ self.title = title
+ self.phenotype = phenotype
+ self.year = year
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Look up the data id for this trait from the
+ published set. For the moment, we assume that there's
+ only one publish freeze.
+ """
+ cursor.execute('''
+ SELECT
+ PublishXRef.DataId
+ FROM
+ PublishXRef, PublishFreeze
+ WHERE
+ PublishFreeze.Id = %s AND
+ PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+ PublishXRef.Id = %s
+ ''' % (freezeId, self.id))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ PublishData.StrainId,
+ PublishData.value
+ FROM
+ PublishData
+ WHERE
+ PublishData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+
+ def longName(self):
+ """
+ A more intelligent string function that uses
+ information about the publication from which this trait came
+ """
+ return "%s: %s by %s" % (self.name, self.title, self.authors)
+
+ def header(self):
+ return ["Record", "Phenotype", "Authors", "Year", "URL"]
+
+ def row(self):
+ return [self.name,
+ self.phenotype,
+ self.authors,
+ str(self.year),
+ ""]
+
+ def tableRow(self, beforeCols, afterCols, color):
+ """
+ tableRow: (arrayof String) -> (arrayof String) -> String
+ To generate a table row to represent this object, appending
+ the additional information in beforeCols and afterCols to the
+ beginning and end
+ """
+ thisRow = self.row()
+
+ # for multiple authors, use "et. al" after first two
+ authors = thisRow[2].split(",")
+ if len(authors) > 2:
+ thisRow[2] = string.join(authors[:2], ",") + ", et al"
+
+ # clip phenotype to 20 chars
+ if len(thisRow[1]) > 20:
+ thisRow[1] = thisRow[1][:20] + "..."
+
+ # add Pub Med URL
+ thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
+
+ return self.tableRowHelper(beforeCols, afterCols, color,
+ thisRow)
+
+
+# TempTrait: a trait with data generate by user and stored in temp table
+class TempTrait(Trait):
+ def __init__(self, id="", name="", href="", description=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.description = description
+
+ def populateDataId(self, cursor, freeezeId):
+ """
+ Look up the data id for this trait from the Temp table, freezeId isn't used,
+ it just for fixing the inherit
+ """
+ cursor.execute('''
+ SELECT
+ DataId
+ FROM
+ Temp
+ WHERE
+ Id=%s
+ ''' % (self.id))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ TempData.StrainId,
+ TempData.value
+ FROM
+ TempData
+ WHERE
+ TempData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+
+ def row(self):
+ return [self.id,
+ self.name,
+ self.description,
+ ""]
+
+
+ def longName(self):
+ """
+ For temp trait, the description always contents whole useful information
+ """
+ return self.description
+
+
+# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
+def queryGenotypeTraitByName(cursor, speciesId, name):
+ qry = '''
+ SELECT
+ Geno.Id,
+ Geno.Name,
+ Geno.Chr,
+ Geno.Mb
+ FROM
+ Geno
+ WHERE
+ Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
+
+ cursor.execute(qry)
+ row = cursor.fetchone()
+ return GenotypeTrait(id=row[0], name=row[1],
+ chromosome=row[2], MB=row[3])
+
+# queryPublishTraitByName : Cursor -> string -> PublishTrait
+def queryPublishTraitByName(cursor, freezeId, name):
+ qry = '''
+ SELECT
+ PublishXRef.Id,
+ Phenotype.Id,
+ Publication.Authors,
+ Publication.Title,
+ Publication.Year,
+ Publication.PubMed_ID
+ FROM
+ Publication, PublishXRef, Phenotype, PublishFreeze
+ WHERE
+ PublishFreeze.Id = %s AND
+ PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+ PublishXRef.Id = %s AND
+ PublishXRef.PublicationId = Publication.Id AND
+ PublishXRef.PhenotypeId = Phenotype.Id
+ ''' % (freezeId, name)
+
+ cursor.execute(qry)
+ if cursor.rowcount == 0:
+ return None
+ else:
+ row = cursor.fetchone()
+
+ return PublishTrait(id=row[0], name='%s'%row[0],
+ authors=row[2], title=row[3],
+ year=row[4], href=row[5])
+
+
+def queryTempTraitByName(cursor, name):
+ name=name.strip()
+ qry = '''
+ SELECT
+ Temp.Id,
+ Temp.Name,
+ Temp.description
+ FROM
+ Temp
+ WHERE
+ Temp.Name= "%s"
+ ''' % (name)
+
+ cursor.execute(qry)
+ if cursor.rowcount == 0:
+ return None
+ else:
+ row = cursor.fetchone()
+ return TempTrait(id=row[0], name=row[1], description=row[2], href='')
+
+# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
+# to retrieve an entire probeset fully populated with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+#XZ, 03/04/2009: It seems to me that this function is never be executed.
+#XZ: Although it can be called from multitrait.loadDatabase,
+#XZ: but the loadDatabase function will not be called
+#XZ: if the targetDatabaseType is probeset.
+#XZ: The probeset traits of target database are retrieved by execute
+#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
+def queryPopulatedProbeSetTraits(cursor, freezeId):
+ step1 = time.time()
+ traits = queryProbeSetTraits(cursor, freezeId)
+ traitDict = {}
+ for trait in traits:
+ traitDict[trait.id] = trait
+
+ step2 = time.time()
+ print
+ #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+ cursor.execute('''
+ SELECT
+ ProbeSetXRef.ProbeSetId,
+ ProbeSetData.StrainId,
+ ProbeSetData.value
+ FROM
+ ProbeSetXRef
+ Left Join ProbeSetData ON
+ ProbeSetXRef.DataId = ProbeSetData.Id
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = %s
+ ''' % freezeId)
+
+ step3 = time.time()
+ totalrows = 0
+ somerows = cursor.fetchmany(1000)
+ while len(somerows) > 0:
+ totalrows += len(somerows)
+ for row in somerows:
+ # this line of code can execute more than one million times
+ traitDict[row[0]].strainData[int(row[1])] = row[2]
+ somerows = cursor.fetchmany(1000)
+
+ #cursor.close()
+ step4 = time.time()
+
+ time1 = step2 - step1
+ time2 = step3 - step2
+ time3 = step4 - step3
+ time4 = step4 - step1
+ #print "%f %f %f %f %d rows" % (round(time1, 2),
+ # round(time2, 2),
+ # round(time3, 2),
+ # round(time4, 2),
+ # totalrows)
+ #print "Fetched %d traits" % len(traits)
+ return traits
+
+
+# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
+# to retrieve probeset fully populated whose ProbeSetId in a range
+# a special ProbeSetId with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+ step1 = time.time()
+ traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
+ traitDict = {}
+ for trait in traits:
+ traitDict[trait.id] = trait
+
+ step2 = time.time()
+ print
+ #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+ cursor.execute('''
+ SELECT
+ ProbeSetXRef.ProbeSetId,
+ ProbeSetData.StrainId,
+ ProbeSetData.value
+ FROM
+ ProbeSetXRef
+ Left Join ProbeSetData ON
+ ProbeSetXRef.DataId = ProbeSetData.Id
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = %s AND
+ ProbeSetXRef.ProbeSetId >= %s AND
+ ProbeSetXRef.ProbeSetId <= %s
+ ''' % (freezeId, ProbeSetId1, ProbeSetId2))
+
+ step3 = time.time()
+ totalrows = 0
+ somerows = cursor.fetchmany(1000)
+ while len(somerows) > 0:
+ totalrows += len(somerows)
+ for row in somerows:
+ # this line of code can execute more than one million times
+ traitDict[row[0]].strainData[int(row[1])] = row[2]
+ somerows = cursor.fetchmany(1000)
+
+ #cursor.close()
+ step4 = time.time()
+
+ time1 = step2 - step1
+ time2 = step3 - step2
+ time3 = step4 - step3
+ time4 = step4 - step1
+ #print "%f %f %f %f %d rows" % (round(time1, 2),
+ # round(time2, 2),
+ # round(time3, 2),
+ # round(time4, 2),
+ # totalrows)
+ #print "Fetched %d traits" % len(traits)
+ return traits
+
+
+# def noneFilter : string or none -> string
+# to replace a possible None by an empty string
+def noneFilter(x):
+ if x is None:
+ return ""
+ else:
+ return x
+
+# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits(cursor, freezeId):
+ """
+ To locate all of the traits in a particular probeset
+ """
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ ProbeSet.Chr,
+ ProbeSet.Mb,
+ ProbeSet.GeneId,
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSet,
+ ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = %s
+ ORDER BY ProbeSet.Id
+ ''' % freezeId
+
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+
+ for row in rows:
+ row = map(noneFilter, row)
+ trait = ProbeSetTrait(id=row[0], name=row[1],
+ description=row[2],
+ chromosome=row[4],
+ MB=row[5],
+ symbol=row[3],
+ GeneId=row[6])
+ trait.dataId = row[7]
+ traits.append(trait)
+
+ return traits
+
+
+# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+ """
+ To locate all of the traits in a particular probeset
+ """
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ ProbeSet.Chr,
+ ProbeSet.Mb,
+ ProbeSet.GeneId,
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSet,
+ ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = %s AND
+ ProbeSet.Id >= %s AND
+ ProbeSet.Id <= %s
+ ORDER BY ProbeSet.Id
+ ''' % (freezeId, ProbeSetId1, ProbeSetId2)
+
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+
+ for row in rows:
+ row = map(noneFilter, row)
+ trait = ProbeSetTrait(id=row[0], name=row[1],
+ description=row[2],
+ chromosome=row[4],
+ MB=row[5],
+ symbol=row[3],
+ GeneId=row[6])
+ trait.dataId = row[7]
+ traits.append(trait)
+
+ return traits
+
+
+# queryPublishTraits : Cursor -> arrayof Trait
+def queryPublishTraits(cursor, freezeId):
+ """
+ To locate all published traits
+ """
+ qry = '''
+ SELECT
+ Publication.Id,
+ Publication.Name,
+ Publication.PhenoType,
+ Publication.Authors,
+ Publication.Title,
+ Publication.Year,
+ Publication.PubMed_ID,
+ PublishXRef.DataId
+ FROM
+ Publication,
+ PublishXRef
+ WHERE
+ PublishXRef.PublishFreezeId = %s AND
+ PublishXRef.PublishId = Publication.Id
+ ''' % freezeId
+
+ qry = '''
+ SELECT
+ Publication.Id,
+ PublishXRef.Id,
+ Phenotype.Pre_publication_description,
+ Phenotype.Post_publication_description,
+ Publication.Authors,
+ Publication.Title,
+ Publication.Year,
+ Publication.PubMed_ID,
+ PublishXRef.DataId
+ FROM
+ Publication, PublishXRef, Phenotype, PublishFreeze
+ WHERE
+ PublishFreeze.Id = %s AND
+ PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+ PublishXRef.PublicationId = Publication.Id AND
+ PublishXRef.PhenotypeId = Phenotype.Id
+ ''' % freezeId
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+ for row in rows:
+ PhenotypeString = row[3]
+ if not row[7] and row[2]:
+ PhenotypeString = row[2]
+ trait = PublishTrait(id=row[0], name= '%s' %row[1],
+ phenotype=PhenotypeString,
+ authors=row[4],
+ title=row[5],
+ year=row[6],
+ href=row[7])
+ trait.dataId = row[8]
+ traits.append(trait)
+
+ return traits
+
+# queryGenotypeTraits : Cursor -> arrayof Trait
+def queryGenotypeTraits(cursor, freezeId):
+ """
+ To locate all traits in the genotype
+ """
+ qry = '''
+ SELECT
+ Geno.Id,
+ Geno.Name,
+ Geno.Chr,
+ GenoXRef.DataId,
+ Geno.Mb
+ FROM
+ Geno,
+ GenoXRef
+ WHERE
+ GenoXRef.GenoId = Geno.Id
+ AND GenoXRef.GenoFreezeId = %s
+ ''' % freezeId
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+
+ for row in rows:
+ trait = GenotypeTrait(id=row[0], name=row[1],
+ chromosome=row[2], MB=row[4])
+ trait.dataId = row[3]
+ traits.append(trait)
+
+ return traits
+
+# queryProbeSetTraitByName : Cursor -> string -> Trait
+# to find a particular trait given its name
+def queryProbeSetTraitByName(cursor, name):
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ ProbeSet.Chr,
+ ProbeSet.Mb,
+ ProbeSet.GeneId
+ FROM
+ ProbeSet
+ WHERE
+ ProbeSet.Name = "%s"
+ ''' % name
+ #print qry
+ cursor.execute(qry)
+ row = cursor.fetchone()
+
+ # convert a MySQL NULL value to an empty string
+ # for gene symbol
+ if row[3] is None:
+ sym = ""
+ else:
+ sym = row[3]
+
+ return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
+ symbol=sym, chromosome=row[4], MB=row[5],
+ GeneId=row[6])
+
+
+# queryTraits : Cursor -> string -> string -> arrayof Traits
+# to find all of the traits whose descriptions match a certain string in a
+# particular database
+def queryTraits(cursor, dbId, queryString):
+ # we do this in two steps:
+ # first we get the data id for the matching traits
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSet,
+ ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = %s AND
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
+ ProbeSet.description LIKE "%%%s%%"
+ ''' % (dbId, queryString)
+ # print qry
+ cursor.execute(qry)
+
+ if cursor.rowcount == 0:
+ print "No traits found"
+ return []
+ else:
+ print "%s traits found" % (cursor.rowcount)
+
+ # maybe fetchall is bad; we will see
+ traits = []
+ for row in cursor.fetchall():
+ myTrait = Trait(row[0], row[1], row[2])
+ myTrait.dataId = row[3]
+ traits.append(myTrait)
+
+ # second we pull all of the strain data for each trait
+ print "Retrieving individual trait data..."
+ for trait in traits:
+ trait.populateStrainData(cursor, trait.dataId)
+ print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
+
+ print "done"
+ return traits
+
+# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
+# to return the short and long name for each ProbeSetFreeze
+# this function is designed specifically for building
+# a database selector
+def queryProbeSetFreezes(cursor):
+ cursor.execute("""
+ SELECT
+ ProbeSetFreeze.Name,
+ ProbeSetFreeze.FullName
+ FROM
+ ProbeSetFreeze
+ ORDER BY
+ ProbeSetFreeze.Name
+ """)
+
+ # for now, fetchall returns the data as a list of tuples
+ # which is what we want
+ return list(cursor.fetchall())
+
+# queryProbeSetFreezeIdName: Cursor -> String -> String, String
+# this function returns the
+# id and the long name of a probesetfreeze given its name
+# once again, it's designed specifically for building
+# the database selector
+def queryProbeSetFreezeIdName(cursor, name):
+ qry = ('''
+ SELECT
+ ProbeSetFreeze.Id,
+ ProbeSetFreeze.FullName
+ FROM
+ ProbeSetFreeze
+ WHERE
+ ProbeSetFreeze.Name = "%s"
+ ''' % name)
+ #print qry
+ cursor.execute(qry)
+
+ row = cursor.fetchone()
+ return row
+
+# queryProbeSetFreezeName: Cursor -> String -> String
+# to return the name of a probe set freeze given its id
+def queryProbeSetFreezeName(cursor, id):
+ cursor.execute('''
+ SELECT
+ ProbeSetFreeze.FullName
+ FROM
+ ProbeSetFreeze
+ WHERE
+ ProbeSetFreeze.Id = %s
+ ''' % id)
+
+ row = cursor.fetchone()
+ return row[0]
+
+# dbNameToTypeId : Cursor -> String -> (String, String)
+# to convert a database name to a (type, id) pair
+def dbNameToTypeId(cursor, name):
+ types = ["ProbeSet", "Geno", "Publish"]
+ for type_ in types:
+ count = cursor.execute('''
+ SELECT
+ %sFreeze.Id
+ FROM
+ %sFreeze
+ WHERE
+ Name = "%s"
+ ''' % (type_, type_, name))
+
+ if count != 0:
+ id = cursor.fetchone()[0]
+ return type_, id
+
+ return None, None
+
+# dbTypeIdToName : Cursor -> String -> String -> String
+# to convert a database (type,id) pair into a name
+def dbTypeIdToName(cursor, dbType, dbId):
+ cursor.execute('''
+ SELECT
+ %sFreeze.Name
+ FROM
+ %sFreeze
+ WHERE
+ Id = %s
+ ''' % (dbType, dbType, dbId))
+
+ row = cursor.fetchone()
+ return row[0]
+
+#XZ, July 21, 2010: I add this function
+def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
+ cursor.execute('''
+ SELECT
+ SpeciesId
+ FROM
+ InbredSet, %sFreeze
+ WHERE
+ %sFreeze.Id = %s
+ and InbredSetId = InbredSet.Id
+ ''' % (dbType, dbType, dbId))
+
+ row = cursor.fetchone()
+ return row[0]
+
+
+# queryStrainCount : Cursor -> int
+# return the number of strains in the database
+def queryStrainCount(cursor):
+ cursor.execute('''
+ SELECT
+ Max(Strain.Id)
+ FROM
+ Strain
+ ''')
+ return (cursor.fetchone())[0]