aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/compareCorrelates/trait.py
diff options
context:
space:
mode:
Diffstat (limited to 'web/webqtl/compareCorrelates/trait.py')
-rwxr-xr-xweb/webqtl/compareCorrelates/trait.py1074
1 files changed, 0 insertions, 1074 deletions
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
deleted file mode 100755
index ff1f8119..00000000
--- a/web/webqtl/compareCorrelates/trait.py
+++ /dev/null
@@ -1,1074 +0,0 @@
-#Trait.py
-#
-#--Individual functions are already annotated, more or less.
-#
-#Classes:
-#RawPoint
-#Trait
-#ProbeSetTrait
-#GenotypeTrait
-#PublishTrait
-#TempTrait
-#-KA
-
-# trait.py: a data structure to represent a trait
-import time
-import string
-
-CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
-
-# RawPoint: to store information about the relationship between two particular
-# traits
-# RawPoint represents directly the input file
-class RawPoint:
- def __init__(self, i, j):
- self.i = i
- self.j = j
-
- def __eq__(self, other):
- return (self.i == other.i and
- self.j == other.j and
- self.spearman == other.spearman and
- self.pearson == other.pearson)
-
- def __str__(self):
- return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
-
-def tdEscapeList(cols, align="left"):
- """
- A helper function used by tableRow
- in Trait that will convert a list of strings into a set of
- table cells enclosed by <td>%s</td> tags
- """
- html = ""
- for col in cols:
- html += '<td style="text-align: %s">%s</td>' % (align, col)
- return html
-
-def thEscapeList(cols):
- """
- A helper function used by tableRowHeader
- in Trait that will convert a list of strings into a set of
- table cells enclosed by <td>%s</td> tags
- """
- html = ""
- for col in cols:
- html += "<th>%s</th>" % col
- return html
-
-def commaEscapeList(cols):
- """
- A helper function used by csvHeader and csvRow.
- Really it's just a wrapper for string.join
- """
- return '"' + string.join(cols, '","') + '"'
-
-
-class Trait:
- """
- A trait represents an attribute of an object. In the WebQTL database, traits are stored
- as ProbeSets; that is, the average values of a set of probes are stored.
- """
- def __init__(self, id="", name="", description="", symbol="", href=""):
- self.id = id
- self.name = name
- self.dbName = ""
- self.symbol = symbol
- self.href = href
- self.strainData = {}
-
- def populateDataId(self, cursor, freezeId):
- """
- Retrieve the dataId for trait data corresponding to the given database
- The way to do this depends on the particular type of trait, so we leave implementation
- to subclasses.
- """
- raise NotImplementedError
-
- def populateStrainData(self, cursor):
- """
- Load this trait full of train data corresponding to the data id
- The data id can either come from populateDataId
- or can be set manually by the user of this class.
- Xiaodong added: The way to do this depends on the particular type of trait,
- so we leave implementation to subclasses.
-
- """
- raise NotImplementedError
-
- def shortName(self):
- """
- To return a short name for this trait; this name should be
- appropriate for a row or column title
- """
- return self.name
-
- def nameNoDB(self):
- """
- To return only the short name without the database attached
- """
- strArray = self.shortName().split('::')
-
- return strArray[1]
-
- def datasetName(self):
- """
- To return only the name of the dataset
- """
- strArray = self.shortName().split('::')
-
- return strArray[0].strip()
-
- def longName(self):
- """
- To return a long name for this trait; this name should be
- appropriate for a key to a table
- """
- return self.shortName()
-
- def __str__(self):
- return self.shortName()
-
- def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
- """
- tableRowHelper: (arrayof String) -. String
- To generate a table row to represent this object, appending
- the additional information in beforeCols and afterCols
- to the beginning and the end
- """
- thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
- self.name)
- html = '<tr bgcolor="%s">' % color
- html += tdEscapeList(beforeCols + thisRow)
- html += tdEscapeList(afterCols, align="right")
- html += "</tr>"
-
- return html
-
-
- def header(self):
- """
- header: (listof String)
- To generate a list of strings describing each piece of data
- returned by row
- """
- raise NotImplementedError
-
- def row(self):
- """
- row: (listof String)
- To generate a list of strings describing this object. The
- elements of this list should be described by header()
- """
- raise NotImplementedError
-
- def tableRowHeader(self, beforeCols, afterCols, color):
- """
- tableRowHeader: (arrayof String) -> (arrayof String) -> String
- To generate a table row header to represent this object,
- appending the additional information in beforeCols and
- afterCols to the beginning and end
- """
- html = '<tr bgcolor="%s">' % color
- html += thEscapeList(beforeCols + self.header() +
- afterCols)
- html += "</tr>"
- return html
-
- def csvHeader(self, beforeCols, afterCols):
- return commaEscapeList(beforeCols + self.header() + afterCols)
-
- def csvRow(self, beforeCols, afterCols):
- return commaEscapeList(beforeCols + self.row() + afterCols)
-
-
- def traitInfoLink(self):
- """
- To build a trait info link to show information about this
- trait. We assume that the database attribute is properly set
- on the hidden form on the page where this link will go.
- """
- return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
-
-# ProbeSetTrait: a trait with data from a probeset
-class ProbeSetTrait(Trait):
- def __init__(self, id="", name="", description="", symbol="", href="",
- chromosome="", MB="", GeneId=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.description = description
- self.symbol = symbol
- self.chromosome = chromosome
- self.MB = MB
- self.GeneId = GeneId
-
- def populateDataId(self, cursor, freezeId):
- """
- Look up the data id for this trait given which
- freeze it came from.
- """
- cursor.execute('''
- SELECT
- ProbeSetXRef.DataId
- FROM
- ProbeSetXRef
- WHERE
- ProbeSetId = %s AND
- ProbeSetFreezeId = %s
- ''' % (self.id, freezeId))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- ProbeSetData.StrainId,
- ProbeSetData.value
- FROM
- ProbeSetData
- WHERE
- ProbeSetData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
-
- def shortName(self):
- """
- An improved string method that uses the gene symbol where
- we have it
- """
- if self.symbol != "":
- return self.symbol
- else:
- return Trait.shortName(self)
-
- def longName(self):
- """
- We use several bits of genetic information to give
- useful information about this trait and where it is
- """
- if self.chromosome != "":
- chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
- self.chromosome,
- self.MB)
- else:
- chrPart = ""
-
- return "%s%s: %s" % (self.name, chrPart, self.description)
-
- def header(self):
- return ["Name", "Symbol", "Description",
- "Chr", "Position (Mb)"]
-
- def row(self):
- if type(self.MB) is float:
- MB = "%.2f" % self.MB
- else:
- MB = ""
-
- return [self.name, self.symbol, self.description,
- self.chromosome, MB]
-
- def tableRow(self, beforeCols, afterCols, color):
- """
- tableRow: (arrayof String) -> (arrayof String) -> String
- To generate a table row to represent this object, appending
- the additional information in beforeCols and afterCols to the
- beginning and end
- """
- thisRow = self.row()
-
- # trim description
- if len(thisRow[2]) > 20:
- thisRow[2] = thisRow[2][:20] + "..."
-
- # add NCBI info link
- thisRow[1] = self.ncbiInfoLink()
-
- return self.tableRowHelper(beforeCols, afterCols, color,
- thisRow)
-
-
- def ncbiInfoLink(self):
- """
- ncbiInfoLink :: String
- To generate an NCBI info link for this trait. If we have a GeneId,
- then we can go straight to the gene. If not, then we generate a search
- link based on the gene symbol. If we have none of them, then we don't
- generate a link at all.
- """
- if self.GeneId != "":
- cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
- elif self.symbol != "":
- cmd = "cmd=Search&term=%s" % self.symbol
- else:
- return ""
-
- return '''
- <a target="_new"
- href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
- %s</a> ''' % (cmd, self.symbol)
-
-
-# GenotypeTrait: a trait with data from the genotype
-class GenotypeTrait(Trait):
- def __init__(self, id="", name="", href="", chromosome="", MB=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.chromosome = chromosome
- self.MB = MB
-
- def populateDataId(self, cursor, freezeId):
- """
- Look up the data id for this trait from the
- genotype.
- """
- cursor.execute('''
- SELECT
- GenoXRef.DataId
- FROM
- GenoXRef
- WHERE
- GenoId = %s AND
- GenoFreezeId = %s
- ''' % (self.id, freezeId))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- GenoData.StrainId,
- GenoData.value
- FROM
- GenoData
- WHERE
- GenoData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
- def header(self):
- return ["Locus", "Chr", "Position (Mb)"]
-
- def row(self):
- return [self.name, self.chromosome, "%.3f" % self.MB]
-
- def tableRow(self, beforeCols, afterCols, color):
- return self.tableRowHelper(beforeCols, afterCols, color, self.row())
-
-# PublishTrait: a trait with data from publications
-class PublishTrait(Trait):
- def __init__(self, id="", name="", href="", authors="", title="",
- phenotype="", year=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.authors = authors
- self.title = title
- self.phenotype = phenotype
- self.year = year
-
- def populateDataId(self, cursor, freezeId):
- """
- Look up the data id for this trait from the
- published set. For the moment, we assume that there's
- only one publish freeze.
- """
- cursor.execute('''
- SELECT
- PublishXRef.DataId
- FROM
- PublishXRef, PublishFreeze
- WHERE
- PublishFreeze.Id = %s AND
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- PublishXRef.Id = %s
- ''' % (freezeId, self.id))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- PublishData.StrainId,
- PublishData.value
- FROM
- PublishData
- WHERE
- PublishData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
-
- def longName(self):
- """
- A more intelligent string function that uses
- information about the publication from which this trait came
- """
- return "%s: %s by %s" % (self.name, self.title, self.authors)
-
- def header(self):
- return ["Record", "Phenotype", "Authors", "Year", "URL"]
-
- def row(self):
- return [self.name,
- self.phenotype,
- self.authors,
- str(self.year),
- ""]
-
- def tableRow(self, beforeCols, afterCols, color):
- """
- tableRow: (arrayof String) -> (arrayof String) -> String
- To generate a table row to represent this object, appending
- the additional information in beforeCols and afterCols to the
- beginning and end
- """
- thisRow = self.row()
-
- # for multiple authors, use "et. al" after first two
- authors = thisRow[2].split(",")
- if len(authors) > 2:
- thisRow[2] = string.join(authors[:2], ",") + ", et al"
-
- # clip phenotype to 20 chars
- if len(thisRow[1]) > 20:
- thisRow[1] = thisRow[1][:20] + "..."
-
- # add Pub Med URL
- thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
-
- return self.tableRowHelper(beforeCols, afterCols, color,
- thisRow)
-
-
-# TempTrait: a trait with data generate by user and stored in temp table
-class TempTrait(Trait):
- def __init__(self, id="", name="", href="", description=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.description = description
-
- def populateDataId(self, cursor, freeezeId):
- """
- Look up the data id for this trait from the Temp table, freezeId isn't used,
- it just for fixing the inherit
- """
- cursor.execute('''
- SELECT
- DataId
- FROM
- Temp
- WHERE
- Id=%s
- ''' % (self.id))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- TempData.StrainId,
- TempData.value
- FROM
- TempData
- WHERE
- TempData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
-
- def row(self):
- return [self.id,
- self.name,
- self.description,
- ""]
-
-
- def longName(self):
- """
- For temp trait, the description always contents whole useful information
- """
- return self.description
-
-
-# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
-def queryGenotypeTraitByName(cursor, speciesId, name):
- qry = '''
- SELECT
- Geno.Id,
- Geno.Name,
- Geno.Chr,
- Geno.Mb
- FROM
- Geno
- WHERE
- Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
-
- cursor.execute(qry)
- row = cursor.fetchone()
- return GenotypeTrait(id=row[0], name=row[1],
- chromosome=row[2], MB=row[3])
-
-# queryPublishTraitByName : Cursor -> string -> PublishTrait
-def queryPublishTraitByName(cursor, freezeId, name):
- qry = '''
- SELECT
- PublishXRef.Id,
- Phenotype.Id,
- Publication.Authors,
- Publication.Title,
- Publication.Year,
- Publication.PubMed_ID
- FROM
- Publication, PublishXRef, Phenotype, PublishFreeze
- WHERE
- PublishFreeze.Id = %s AND
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- PublishXRef.Id = %s AND
- PublishXRef.PublicationId = Publication.Id AND
- PublishXRef.PhenotypeId = Phenotype.Id
- ''' % (freezeId, name)
-
- cursor.execute(qry)
- if cursor.rowcount == 0:
- return None
- else:
- row = cursor.fetchone()
-
- return PublishTrait(id=row[0], name='%s'%row[0],
- authors=row[2], title=row[3],
- year=row[4], href=row[5])
-
-
-def queryTempTraitByName(cursor, name):
- name=name.strip()
- qry = '''
- SELECT
- Temp.Id,
- Temp.Name,
- Temp.description
- FROM
- Temp
- WHERE
- Temp.Name= "%s"
- ''' % (name)
-
- cursor.execute(qry)
- if cursor.rowcount == 0:
- return None
- else:
- row = cursor.fetchone()
- return TempTrait(id=row[0], name=row[1], description=row[2], href='')
-
-# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
-# to retrieve an entire probeset fully populated with data
-# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
-# a million records)
-# so we ask for the data in bulk
-#
-# cursor should be SSCursor for MySQL so rows are stored on the server side
-# and tuples are used
-# we explicitly close the cursor here as well
-#XZ, 03/04/2009: It seems to me that this function is never be executed.
-#XZ: Although it can be called from multitrait.loadDatabase,
-#XZ: but the loadDatabase function will not be called
-#XZ: if the targetDatabaseType is probeset.
-#XZ: The probeset traits of target database are retrieved by execute
-#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
-def queryPopulatedProbeSetTraits(cursor, freezeId):
- step1 = time.time()
- traits = queryProbeSetTraits(cursor, freezeId)
- traitDict = {}
- for trait in traits:
- traitDict[trait.id] = trait
-
- step2 = time.time()
- print
- #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
- cursor.execute('''
- SELECT
- ProbeSetXRef.ProbeSetId,
- ProbeSetData.StrainId,
- ProbeSetData.value
- FROM
- ProbeSetXRef
- Left Join ProbeSetData ON
- ProbeSetXRef.DataId = ProbeSetData.Id
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = %s
- ''' % freezeId)
-
- step3 = time.time()
- totalrows = 0
- somerows = cursor.fetchmany(1000)
- while len(somerows) > 0:
- totalrows += len(somerows)
- for row in somerows:
- # this line of code can execute more than one million times
- traitDict[row[0]].strainData[int(row[1])] = row[2]
- somerows = cursor.fetchmany(1000)
-
- #cursor.close()
- step4 = time.time()
-
- time1 = step2 - step1
- time2 = step3 - step2
- time3 = step4 - step3
- time4 = step4 - step1
- #print "%f %f %f %f %d rows" % (round(time1, 2),
- # round(time2, 2),
- # round(time3, 2),
- # round(time4, 2),
- # totalrows)
- #print "Fetched %d traits" % len(traits)
- return traits
-
-
-# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
-# to retrieve probeset fully populated whose ProbeSetId in a range
-# a special ProbeSetId with data
-# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
-# a million records)
-# so we ask for the data in bulk
-#
-# cursor should be SSCursor for MySQL so rows are stored on the server side
-# and tuples are used
-# we explicitly close the cursor here as well
-def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
- step1 = time.time()
- traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
- traitDict = {}
- for trait in traits:
- traitDict[trait.id] = trait
-
- step2 = time.time()
- print
- #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
- cursor.execute('''
- SELECT
- ProbeSetXRef.ProbeSetId,
- ProbeSetData.StrainId,
- ProbeSetData.value
- FROM
- ProbeSetXRef
- Left Join ProbeSetData ON
- ProbeSetXRef.DataId = ProbeSetData.Id
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = %s AND
- ProbeSetXRef.ProbeSetId >= %s AND
- ProbeSetXRef.ProbeSetId <= %s
- ''' % (freezeId, ProbeSetId1, ProbeSetId2))
-
- step3 = time.time()
- totalrows = 0
- somerows = cursor.fetchmany(1000)
- while len(somerows) > 0:
- totalrows += len(somerows)
- for row in somerows:
- # this line of code can execute more than one million times
- traitDict[row[0]].strainData[int(row[1])] = row[2]
- somerows = cursor.fetchmany(1000)
-
- #cursor.close()
- step4 = time.time()
-
- time1 = step2 - step1
- time2 = step3 - step2
- time3 = step4 - step3
- time4 = step4 - step1
- #print "%f %f %f %f %d rows" % (round(time1, 2),
- # round(time2, 2),
- # round(time3, 2),
- # round(time4, 2),
- # totalrows)
- #print "Fetched %d traits" % len(traits)
- return traits
-
-
-# def noneFilter : string or none -> string
-# to replace a possible None by an empty string
-def noneFilter(x):
- if x is None:
- return ""
- else:
- return x
-
-# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
-def queryProbeSetTraits(cursor, freezeId):
- """
- To locate all of the traits in a particular probeset
- """
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- ProbeSet.Chr,
- ProbeSet.Mb,
- ProbeSet.GeneId,
- ProbeSetXRef.DataId
- FROM
- ProbeSet,
- ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = %s
- ORDER BY ProbeSet.Id
- ''' % freezeId
-
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
-
- for row in rows:
- row = map(noneFilter, row)
- trait = ProbeSetTrait(id=row[0], name=row[1],
- description=row[2],
- chromosome=row[4],
- MB=row[5],
- symbol=row[3],
- GeneId=row[6])
- trait.dataId = row[7]
- traits.append(trait)
-
- return traits
-
-
-# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
-def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
- """
- To locate all of the traits in a particular probeset
- """
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- ProbeSet.Chr,
- ProbeSet.Mb,
- ProbeSet.GeneId,
- ProbeSetXRef.DataId
- FROM
- ProbeSet,
- ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = %s AND
- ProbeSet.Id >= %s AND
- ProbeSet.Id <= %s
- ORDER BY ProbeSet.Id
- ''' % (freezeId, ProbeSetId1, ProbeSetId2)
-
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
-
- for row in rows:
- row = map(noneFilter, row)
- trait = ProbeSetTrait(id=row[0], name=row[1],
- description=row[2],
- chromosome=row[4],
- MB=row[5],
- symbol=row[3],
- GeneId=row[6])
- trait.dataId = row[7]
- traits.append(trait)
-
- return traits
-
-
-# queryPublishTraits : Cursor -> arrayof Trait
-def queryPublishTraits(cursor, freezeId):
- """
- To locate all published traits
- """
- qry = '''
- SELECT
- Publication.Id,
- Publication.Name,
- Publication.PhenoType,
- Publication.Authors,
- Publication.Title,
- Publication.Year,
- Publication.PubMed_ID,
- PublishXRef.DataId
- FROM
- Publication,
- PublishXRef
- WHERE
- PublishXRef.PublishFreezeId = %s AND
- PublishXRef.PublishId = Publication.Id
- ''' % freezeId
-
- qry = '''
- SELECT
- Publication.Id,
- PublishXRef.Id,
- Phenotype.Pre_publication_description,
- Phenotype.Post_publication_description,
- Publication.Authors,
- Publication.Title,
- Publication.Year,
- Publication.PubMed_ID,
- PublishXRef.DataId
- FROM
- Publication, PublishXRef, Phenotype, PublishFreeze
- WHERE
- PublishFreeze.Id = %s AND
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- PublishXRef.PublicationId = Publication.Id AND
- PublishXRef.PhenotypeId = Phenotype.Id
- ''' % freezeId
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
- for row in rows:
- PhenotypeString = row[3]
- if not row[7] and row[2]:
- PhenotypeString = row[2]
- trait = PublishTrait(id=row[0], name= '%s' %row[1],
- phenotype=PhenotypeString,
- authors=row[4],
- title=row[5],
- year=row[6],
- href=row[7])
- trait.dataId = row[8]
- traits.append(trait)
-
- return traits
-
-# queryGenotypeTraits : Cursor -> arrayof Trait
-def queryGenotypeTraits(cursor, freezeId):
- """
- To locate all traits in the genotype
- """
- qry = '''
- SELECT
- Geno.Id,
- Geno.Name,
- Geno.Chr,
- GenoXRef.DataId,
- Geno.Mb
- FROM
- Geno,
- GenoXRef
- WHERE
- GenoXRef.GenoId = Geno.Id
- AND GenoXRef.GenoFreezeId = %s
- ''' % freezeId
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
-
- for row in rows:
- trait = GenotypeTrait(id=row[0], name=row[1],
- chromosome=row[2], MB=row[4])
- trait.dataId = row[3]
- traits.append(trait)
-
- return traits
-
-# queryProbeSetTraitByName : Cursor -> string -> Trait
-# to find a particular trait given its name
-def queryProbeSetTraitByName(cursor, name):
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- ProbeSet.Chr,
- ProbeSet.Mb,
- ProbeSet.GeneId
- FROM
- ProbeSet
- WHERE
- ProbeSet.Name = "%s"
- ''' % name
- #print qry
- cursor.execute(qry)
- row = cursor.fetchone()
-
- # convert a MySQL NULL value to an empty string
- # for gene symbol
- if row[3] is None:
- sym = ""
- else:
- sym = row[3]
-
- return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
- symbol=sym, chromosome=row[4], MB=row[5],
- GeneId=row[6])
-
-
-# queryTraits : Cursor -> string -> string -> arrayof Traits
-# to find all of the traits whose descriptions match a certain string in a
-# particular database
-def queryTraits(cursor, dbId, queryString):
- # we do this in two steps:
- # first we get the data id for the matching traits
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSetXRef.DataId
- FROM
- ProbeSet,
- ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = %s AND
- ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
- ProbeSet.description LIKE "%%%s%%"
- ''' % (dbId, queryString)
- # print qry
- cursor.execute(qry)
-
- if cursor.rowcount == 0:
- print "No traits found"
- return []
- else:
- print "%s traits found" % (cursor.rowcount)
-
- # maybe fetchall is bad; we will see
- traits = []
- for row in cursor.fetchall():
- myTrait = Trait(row[0], row[1], row[2])
- myTrait.dataId = row[3]
- traits.append(myTrait)
-
- # second we pull all of the strain data for each trait
- print "Retrieving individual trait data..."
- for trait in traits:
- trait.populateStrainData(cursor, trait.dataId)
- print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
-
- print "done"
- return traits
-
-# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
-# to return the short and long name for each ProbeSetFreeze
-# this function is designed specifically for building
-# a database selector
-def queryProbeSetFreezes(cursor):
- cursor.execute("""
- SELECT
- ProbeSetFreeze.Name,
- ProbeSetFreeze.FullName
- FROM
- ProbeSetFreeze
- ORDER BY
- ProbeSetFreeze.Name
- """)
-
- # for now, fetchall returns the data as a list of tuples
- # which is what we want
- return list(cursor.fetchall())
-
-# queryProbeSetFreezeIdName: Cursor -> String -> String, String
-# this function returns the
-# id and the long name of a probesetfreeze given its name
-# once again, it's designed specifically for building
-# the database selector
-def queryProbeSetFreezeIdName(cursor, name):
- qry = ('''
- SELECT
- ProbeSetFreeze.Id,
- ProbeSetFreeze.FullName
- FROM
- ProbeSetFreeze
- WHERE
- ProbeSetFreeze.Name = "%s"
- ''' % name)
- #print qry
- cursor.execute(qry)
-
- row = cursor.fetchone()
- return row
-
-# queryProbeSetFreezeName: Cursor -> String -> String
-# to return the name of a probe set freeze given its id
-def queryProbeSetFreezeName(cursor, id):
- cursor.execute('''
- SELECT
- ProbeSetFreeze.FullName
- FROM
- ProbeSetFreeze
- WHERE
- ProbeSetFreeze.Id = %s
- ''' % id)
-
- row = cursor.fetchone()
- return row[0]
-
-# dbNameToTypeId : Cursor -> String -> (String, String)
-# to convert a database name to a (type, id) pair
-def dbNameToTypeId(cursor, name):
- types = ["ProbeSet", "Geno", "Publish"]
- for type_ in types:
- count = cursor.execute('''
- SELECT
- %sFreeze.Id
- FROM
- %sFreeze
- WHERE
- Name = "%s"
- ''' % (type_, type_, name))
-
- if count != 0:
- id = cursor.fetchone()[0]
- return type_, id
-
- return None, None
-
-# dbTypeIdToName : Cursor -> String -> String -> String
-# to convert a database (type,id) pair into a name
-def dbTypeIdToName(cursor, dbType, dbId):
- cursor.execute('''
- SELECT
- %sFreeze.Name
- FROM
- %sFreeze
- WHERE
- Id = %s
- ''' % (dbType, dbType, dbId))
-
- row = cursor.fetchone()
- return row[0]
-
-#XZ, July 21, 2010: I add this function
-def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
- cursor.execute('''
- SELECT
- SpeciesId
- FROM
- InbredSet, %sFreeze
- WHERE
- %sFreeze.Id = %s
- and InbredSetId = InbredSet.Id
- ''' % (dbType, dbType, dbId))
-
- row = cursor.fetchone()
- return row[0]
-
-
-# queryStrainCount : Cursor -> int
-# return the number of strains in the database
-def queryStrainCount(cursor):
- cursor.execute('''
- SELECT
- Max(Strain.Id)
- FROM
- Strain
- ''')
- return (cursor.fetchone())[0]