aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py221
-rwxr-xr-xwqflask/base/trait.py (renamed from wqflask/base/webqtlTrait.py)445
-rw-r--r--wqflask/wqflask/do_search.py5
-rw-r--r--wqflask/wqflask/search_results.py5
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py295
5 files changed, 638 insertions, 333 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 70b33014..68f5e5ed 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -21,12 +21,16 @@
# This module is used by GeneNetwork project (www.genenetwork.org)
from __future__ import print_function, division
+import os
from flask import Flask, g
from htmlgen import HTMLgen2 as HT
+import reaper
+
import webqtlConfig
+from utility import webqtlUtil
from MySQLdb import escape_string as escape
from pprint import pformat as pf
@@ -57,6 +61,74 @@ def create_dataset(dataset_name):
return dataset_class(dataset_name)
+class DatasetGroup(object):
+ """
+ Each group has multiple datasets; each species has multiple groups.
+
+ For example, Mouse has multiple groups (BXD, BXA, etc), and each group
+ has multiple datasets associated with it.
+
+ """
+ def __init__(self, dataset):
+ """This sets self.group and self.group_id"""
+ self.name, self.group_id = g.db.execute(dataset.query).fetchone()
+ if self.name == 'BXD300':
+ self.name = "BXD"
+
+ self.incparentsf1 = False
+
+
+ #def read_genotype(self):
+ # self.read_genotype_file()
+ #
+ # if not self.genotype: # Didn'd succeed, so we try method 2
+ # self.read_genotype_data()
+
+ def read_genotype_file(self):
+ '''read genotype from .geno file instead of database'''
+ #if self.group == 'BXD300':
+ # self.group = 'BXD'
+ #
+ #assert self.group, "self.group needs to be set"
+
+ #genotype_1 is Dataset Object without parents and f1
+ #genotype_2 is Dataset Object with parents and f1 (not for intercross)
+
+ self.genotype_1 = reaper.Dataset()
+
+ # reaper barfs on unicode filenames, so here we ensure it's a string
+ full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
+ self.genotype_1.read(full_filename)
+
+ print("Got to after read")
+
+ try:
+ # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py;
+ _f1, _f12, _mat, _pat = webqtlUtil.ParInfo[self.name]
+ except KeyError:
+ _f1 = _f12 = _mat = _pat = None
+
+ self.genotype_2 = self.genotype_1
+ if self.genotype_1.type == "group" and _mat and _pat:
+ self.genotype_2 = self.genotype_1.add(Mat=_mat, Pat=_pat) #, F1=_f1)
+
+ #determine default genotype object
+ if self.incparentsf1 and self.genotype_1.type != "intercross":
+ self.genotype = self.genotype_2
+ else:
+ self.incparentsf1 = 0
+ self.genotype = self.genotype_1
+
+ self.samplelist = list(self.genotype.prgy)
+ self.f1list = []
+ self.parlist = []
+
+ if _f1 and _f12:
+ self.f1list = [_f1, _f12]
+ if _mat and _pat:
+ self.parlist = [_mat, _pat]
+
+
class DataSet(object):
"""
DataSet class defines a dataset in webqtl, can be either Microarray,
@@ -70,27 +142,35 @@ class DataSet(object):
self.name = name
self.id = None
self.type = None
- self.group = None
self.setup()
self.check_confidentiality()
self.retrieve_name()
- self.get_group()
+ self.group = DatasetGroup(self) # sets self.group and self.group_id
+
+
+ def get_desc(self):
+ """Gets overridden later, at least for Temp...used by trait's get_given_name"""
+ return None
# Delete this eventually
@property
def riset():
Weve_Renamed_This_As_Group
+
+
+ #@property
+ #def group(self):
+ # if not self._group:
+ # self.get_group()
+ #
+ # return self._group
+
- def get_group(self):
- self.group, self.group_id = g.db.execute(self.query).fetchone()
- if self.group == 'BXD300':
- self.group = "BXD"
- #return group
def retrieve_name(self):
@@ -176,7 +256,7 @@ class PhenotypeDataSet(DataSet):
self.type = 'Publish'
- self.query = '''
+ self.query_for_group = '''
SELECT
InbredSet.Name, InbredSet.Id
FROM
@@ -239,7 +319,29 @@ class PhenotypeDataSet(DataSet):
this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
- this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
+ this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb))
+
+ def retrieve_sample_data(self, trait):
+ query = """
+ SELECT
+ Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
+ FROM
+ (PublishData, Strain, PublishXRef, PublishFreeze)
+ left join PublishSE on
+ (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+ left join NStrain on
+ (NStrain.DataId = PublishData.Id AND
+ NStrain.StrainId = PublishData.StrainId)
+ WHERE
+ PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
+ PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+ Order BY
+ Strain.Name
+ """ % (self.trait.name, self.id)
+ results = g.db.execute(query).fetchall()
+ return results
+
class GenotypeDataSet(DataSet):
DS_NAME_MAP['Geno'] = 'GenotypeDataSet'
@@ -297,6 +399,26 @@ class GenotypeDataSet(DataSet):
this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
this_trait.location_value = trait_location_value
+
+ def retrieve_sample_data(self, trait):
+ query = """
+ SELECT
+ Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
+ FROM
+ (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
+ left join GenoSE on
+ (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+ WHERE
+ Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+ GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+ GenoFreeze.Name = '%s' AND
+ GenoXRef.DataId = GenoData.Id AND
+ GenoData.StrainId = Strain.Id
+ Order BY
+ Strain.Name
+ """ % (webqtlDatabaseFunction.retrieve_species_id(self.group), trait.name, self.name)
+ results = g.db.execute(query).fetchall()
+ return results
class MrnaAssayDataSet(DataSet):
@@ -476,6 +598,42 @@ class MrnaAssayDataSet(DataSet):
this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
+
+ def get_sequence(self):
+ query = """
+ SELECT
+ ProbeSet.BlatSeq
+ FROM
+ ProbeSet, ProbeSetFreeze, ProbeSetXRef
+ WHERE
+ ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+ ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+ ProbeSet.Name = %s
+ ProbeSetFreeze.Name = %s
+ """ % (escape(self.name), escape(self.dataset.name))
+ results = g.db.execute(query).fetchone()
+
+ return results[0]
+
+ def retrieve_sample_data(self, trait):
+ query = """
+ SELECT
+ Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+ FROM
+ (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+ left join ProbeSetSE on
+ (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+ WHERE
+ ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetFreeze.Name = '%s' AND
+ ProbeSetXRef.DataId = ProbeSetData.Id AND
+ ProbeSetData.StrainId = Strain.Id
+ Order BY
+ Strain.Name
+ """ % (escape(trait.name), escape(self.name))
+ results = g.db.execute(query).fetchall()
+ return results
class TempDataSet(DataSet):
@@ -497,6 +655,51 @@ class TempDataSet(DataSet):
self.id = 1
self.fullname = 'Temporary Storage'
self.shortname = 'Temp'
+
+
+ @staticmethod
+ def handle_pca(desc):
+ if 'PCA' in desc:
+ # Todo: Modernize below lines
+ desc = desc[desc.rindex(':')+1:].strip()
+ else:
+ desc = desc[:desc.index('entered')].strip()
+ return desc
+
+ def get_desc(self):
+ g.db.execute('SELECT description FROM Temp WHERE Name=%s', self.name)
+ desc = g.db.fetchone()[0]
+ desc = self.handle_pca(desc)
+ return desc
+
+ def get_group(self):
+ self.cursor.execute("""
+ SELECT
+ InbredSet.Name, InbredSet.Id
+ FROM
+ InbredSet, Temp
+ WHERE
+ Temp.InbredSetId = InbredSet.Id AND
+ Temp.Name = "%s"
+ """, self.name)
+ self.group, self.group_id = self.cursor.fetchone()
+ #return self.group
+
+ def retrieve_sample_data(self, trait):
+ query = """
+ SELECT
+ Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
+ FROM
+ TempData, Temp, Strain
+ WHERE
+ TempData.StrainId = Strain.Id AND
+ TempData.Id = Temp.DataId AND
+ Temp.name = '%s'
+ Order BY
+ Strain.Name
+ """ % escape(trait.name)
+
+ results = g.db.execute(query).fetchall()
def geno_mrna_confidentiality(ob):
diff --git a/wqflask/base/webqtlTrait.py b/wqflask/base/trait.py
index 5367b41f..d3753fc1 100755
--- a/wqflask/base/webqtlTrait.py
+++ b/wqflask/base/trait.py
@@ -28,7 +28,7 @@ class GeneralTrait:
self.name = kw.get('name', None) # Trait ID, ProbeSet ID, Published ID, etc.
self.cellid = kw.get('cellid', None)
self.identification = kw.get('identification', 'un-named trait')
- self.group = kw.get('group', None)
+ #self.group = kw.get('group', None)
self.haveinfo = kw.get('haveinfo', False)
self.sequence = kw.get('sequence', None) # Blat sequence, available for ProbeSet
self.data = kw.get('data', {})
@@ -41,28 +41,28 @@ class GeneralTrait:
self.dataset, self.name, self.cellid = name2
#if self.dataset and isinstance(self.dataset, basestring):
- self.dataset = create_dataset(self.dataset.name)
+ self.dataset = create_dataset(self.dataset)
print("self.dataset is:", self.dataset, type(self.dataset))
#if self.dataset:
- self.dataset.get_group()
+ #self.dataset.get_group()
- if self.dataset.type == "Temp":
- self.cursor.execute('''
- SELECT
- InbredSet.Name
- FROM
- InbredSet, Temp
- WHERE
- Temp.InbredSetId = InbredSet.Id AND
- Temp.Name = "%s"
- ''', self.name)
- self.group = self.cursor.fetchone()[0]
- else:
- self.group = self.dataset.get_group()
+ #if self.dataset.type == "Temp":
+ # self.cursor.execute('''
+ # SELECT
+ # InbredSet.Name
+ # FROM
+ # InbredSet, Temp
+ # WHERE
+ # Temp.InbredSetId = InbredSet.Id AND
+ # Temp.Name = "%s"
+ # ''', self.name)
+ # self.group = self.cursor.fetchone()[0]
+ #else:
+ # self.group = self.dataset.get_group()
- print("trinity, self.group is:", self.group)
+ #print("trinity, self.group is:", self.group)
#
# In ProbeSet, there are maybe several annotations match one sequence
@@ -77,76 +77,80 @@ class GeneralTrait:
# It also should be changed in other places where it are used.
#if self.dataset:
- if self.dataset.type == 'ProbeSet':
- print("Doing ProbeSet Query")
- query = '''
- SELECT
- ProbeSet.BlatSeq
- FROM
- ProbeSet, ProbeSetFreeze, ProbeSetXRef
- WHERE
- ProbeSet.Id=ProbeSetXRef.ProbeSetId and
- ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
- ProbeSet.Name = %s and
- ProbeSetFreeze.Name = %s
- ''', (self.name, self.dataset.name)
- print("query is:", query)
- self.sequence = g.db.execute(*query).fetchone()[0]
- #self.sequence = self.cursor.fetchone()[0]
- print("self.sequence is:", self.sequence)
-
-
- def getName(self):
- str = ""
+ #if self.dataset.type == 'ProbeSet':
+ # print("Doing ProbeSet Query")
+ # query = '''
+ # SELECT
+ # ProbeSet.BlatSeq
+ # FROM
+ # ProbeSet, ProbeSetFreeze, ProbeSetXRef
+ # WHERE
+ # ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+ # ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
+ # ProbeSet.Name = %s and
+ # ProbeSetFreeze.Name = %s
+ # ''', (self.name, self.dataset.name)
+ # print("query is:", query)
+ # self.sequence = g.db.execute(*query).fetchone()[0]
+ # #self.sequence = self.cursor.fetchone()[0]
+ # print("self.sequence is:", self.sequence)
+
+
+ def get_name(self):
+ stringy = ""
if self.dataset and self.name:
- str = "%s::%s" % (self.dataset, self.name)
+ stringy = "%s::%s" % (self.dataset, self.name)
if self.cellid:
- str += "::" + self.cellid
+ stringy += "::" + self.cellid
else:
- str = self.description
- return str
-
- #
- # when user enter a trait or GN generate a trait, user want show the name
- # not the name that generated by GN randomly, the two follow function are
- # used to give the real name and the database. displayName() will show the
- # database also, getGivenName() just show the name.
- # For other trait, displayName() as same as getName(), getGivenName() as
- # same as self.name
- #
- # Hongqiang 11/29/07
- #
- def getGivenName(self):
- str = self.name
+ stringy = self.description
+ return stringy
+
+
+ def get_given_name(self):
+ """
+ when user enter a trait or GN generate a trait, user want show the name
+ not the name that generated by GN randomly, the two follow function are
+ used to give the real name and the database. displayName() will show the
+ database also, getGivenName() just show the name.
+ For other trait, displayName() as same as getName(), getGivenName() as
+ same as self.name
+
+ Hongqiang 11/29/07
+
+ """
+ stringy = self.name
if self.dataset and self.name:
- if self.dataset.type=='Temp':
- self.cursor.execute('SELECT description FROM Temp WHERE Name=%s', self.name)
- desc = self.cursor.fetchone()[0]
- if desc.__contains__('PCA'):
- desc = desc[desc.rindex(':')+1:].strip()
- else:
- desc = desc[:desc.index('entered')].strip()
- str = desc
- return str
+ desc = self.dataset.get_desc()
+ if desc:
+ #desc = self.handle_pca(desc)
+ stringy = desc
+ return stringy
+
+
- def displayName(self):
- str = ""
+ def display_name(self):
+ stringy = ""
if self.dataset and self.name:
- if self.dataset.type=='Temp':
- desc = self.description
- if desc.__contains__('PCA'):
- desc = desc[desc.rindex(':')+1:].strip()
- else:
- desc = desc[:desc.index('entered')].strip()
- str = "%s::%s" % (self.dataset, desc)
+ desc = self.dataset.get_desc()
+ #desc = self.handle_pca(desc)
+ if desc:
+ #desc = self.handle_pca(desc)
+ #stringy = desc
+ #if desc.__contains__('PCA'):
+ # desc = desc[desc.rindex(':')+1:].strip()
+ #else:
+ # desc = desc[:desc.index('entered')].strip()
+ #desc = self.handle_pca(desc)
+ stringy = "%s::%s" % (self.dataset, desc)
else:
- str = "%s::%s" % (self.dataset, self.name)
+ stringy = "%s::%s" % (self.dataset, self.name)
if self.cellid:
- str += "::" + self.cellid
+ stringy += "::" + self.cellid
else:
- str = self.description
+ stringy = self.description
- return str
+ return stringy
#def __str__(self):
@@ -155,41 +159,43 @@ class GeneralTrait:
#__str__ = getName
#__repr__ = __str__
- def exportData(self, samplelist, type="val"):
+ def export_data(self, samplelist, the_type="val"):
"""
- export data according to samplelist
- mostly used in calculating correlation
+ export data according to samplelist
+ mostly used in calculating correlation
+
"""
result = []
for sample in samplelist:
if self.data.has_key(sample):
- if type=='val':
+ if the_type=='val':
result.append(self.data[sample].val)
- elif type=='var':
+ elif the_type=='var':
result.append(self.data[sample].var)
- elif type=='N':
+ elif the_type=='N':
result.append(self.data[sample].N)
else:
- raise KeyError, `type`+' type is incorrect.'
+ raise KeyError, `the_type`+' the_type is incorrect.'
else:
result.append(None)
return result
- def exportInformative(self, incVar=0):
+ def export_informative(self, incVar=0):
"""
- export informative sample
- mostly used in qtl regression
+ export informative sample
+ mostly used in qtl regression
+
"""
samples = []
vals = []
- vars = []
+ the_vars = []
for sample, value in self.data.items():
if value.val != None:
if not incVar or value.var != None:
samples.append(sample)
vals.append(value.val)
- vars.append(value.var)
- return samples, vals, vars
+ the_vars.append(value.var)
+ return samples, vals, the_vars
#
@@ -199,136 +205,144 @@ class GeneralTrait:
#
# Hongqiang Li, 3/3/2008
#
- def getSequence(self):
- assert self.cursor
- if self.dataset.type == 'ProbeSet':
- self.cursor.execute('''
- SELECT
- ProbeSet.BlatSeq
- FROM
- ProbeSet, ProbeSetFreeze, ProbeSetXRef
- WHERE
- ProbeSet.Id=ProbeSetXRef.ProbeSetId and
- ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
- ProbeSet.Name = %s
- ProbeSetFreeze.Name = %s
- ''', self.name, self.dataset.name)
- #self.cursor.execute(query)
- results = self.fetchone()
-
- return results[0]
-
+ #def getSequence(self):
+ # assert self.cursor
+ # if self.dataset.type == 'ProbeSet':
+ # self.cursor.execute('''
+ # SELECT
+ # ProbeSet.BlatSeq
+ # FROM
+ # ProbeSet, ProbeSetFreeze, ProbeSetXRef
+ # WHERE
+ # ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+ # ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+ # ProbeSet.Name = %s
+ # ProbeSetFreeze.Name = %s
+ # ''', self.name, self.dataset.name)
+ # #self.cursor.execute(query)
+ # results = self.fetchone()
+ #
+ # return results[0]
- def retrieveData(self, samplelist=None):
+ def retrieve_sample_data(self, samplelist=None):
if samplelist == None:
samplelist = []
- assert self.dataset and self.cursor
-
- if self.dataset.type == 'Temp':
- query = '''
- SELECT
- Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
- FROM
- TempData, Temp, Strain
- WHERE
- TempData.StrainId = Strain.Id AND
- TempData.Id = Temp.DataId AND
- Temp.name = '%s'
- Order BY
- Strain.Name
- ''' % self.name
- #XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE
- elif self.dataset.type == 'Publish':
- query = '''
- SELECT
- Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
- FROM
- (PublishData, Strain, PublishXRef, PublishFreeze)
- left join PublishSE on
- (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
- left join NStrain on
- (NStrain.DataId = PublishData.Id AND
- NStrain.StrainId = PublishData.StrainId)
- WHERE
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
- PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
- Order BY
- Strain.Name
- ''' % (self.name, self.dataset.id)
+
+ assert self.dataset
+
+ #if self.cellid:
+ # #Probe Data
+ # query = '''
+ # SELECT
+ # Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id
+ # FROM
+ # (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef,
+ # Strain, Probe, ProbeSet)
+ # left join ProbeSE on
+ # (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
+ # WHERE
+ # Probe.Name = '%s' AND ProbeSet.Name = '%s' AND
+ # Probe.ProbeSetId = ProbeSet.Id AND
+ # ProbeXRef.ProbeId = Probe.Id AND
+ # ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND
+ # ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+ # ProbeSetFreeze.Name = '%s' AND
+ # ProbeXRef.DataId = ProbeData.Id AND
+ # ProbeData.StrainId = Strain.Id
+ # Order BY
+ # Strain.Name
+ # ''' % (self.cellid, self.name, self.dataset.name)
+ #
+ #else:
+ results = self.dataset.retrieve_sample_data(self)
+
+ #if self.dataset.type == 'Temp':
+ # query = '''
+ # SELECT
+ # Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id
+ # FROM
+ # TempData, Temp, Strain
+ # WHERE
+ # TempData.StrainId = Strain.Id AND
+ # TempData.Id = Temp.DataId AND
+ # Temp.name = '%s'
+ # Order BY
+ # Strain.Name
+ # ''' % self.name
+ ##XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE
+ #elif self.dataset.type == 'Publish':
+ # query = '''
+ # SELECT
+ # Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id
+ # FROM
+ # (PublishData, Strain, PublishXRef, PublishFreeze)
+ # left join PublishSE on
+ # (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+ # left join NStrain on
+ # (NStrain.DataId = PublishData.Id AND
+ # NStrain.StrainId = PublishData.StrainId)
+ # WHERE
+ # PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ # PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
+ # PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+ # Order BY
+ # Strain.Name
+ # ''' % (self.name, self.dataset.id)
#XZ, 03/02/2009: Xiaodong changed Data to ProbeData, SE to ProbeSE
- elif self.cellid:
- #Probe Data
- query = '''
- SELECT
- Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id
- FROM
- (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef,
- Strain, Probe, ProbeSet)
- left join ProbeSE on
- (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId)
- WHERE
- Probe.Name = '%s' AND ProbeSet.Name = '%s' AND
- Probe.ProbeSetId = ProbeSet.Id AND
- ProbeXRef.ProbeId = Probe.Id AND
- ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND
- ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
- ProbeSetFreeze.Name = '%s' AND
- ProbeXRef.DataId = ProbeData.Id AND
- ProbeData.StrainId = Strain.Id
- Order BY
- Strain.Name
- ''' % (self.cellid, self.name, self.dataset.name)
+ #elif self.cellid:
+
#XZ, 03/02/2009: Xiaodong added this block for ProbeSetData and ProbeSetSE
- elif self.dataset.type == 'ProbeSet':
- #ProbeSet Data
- query = '''
- SELECT
- Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
- FROM
- (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
- left join ProbeSetSE on
- (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
- WHERE
- ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = '%s' AND
- ProbeSetXRef.DataId = ProbeSetData.Id AND
- ProbeSetData.StrainId = Strain.Id
- Order BY
- Strain.Name
- ''' % (self.name, self.dataset.name)
- #XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE
- else:
- #Geno Data
- #XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search.
- query = '''
- SELECT
- Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
- FROM
- (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
- left join GenoSE on
- (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
- WHERE
- Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
- GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = '%s' AND
- GenoXRef.DataId = GenoData.Id AND
- GenoData.StrainId = Strain.Id
- Order BY
- Strain.Name
- ''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group), self.name, self.dataset.name)
+ #elif self.dataset.type == 'ProbeSet':
+ # #ProbeSet Data
+ # query = '''
+ # SELECT
+ # Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+ # FROM
+ # (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+ # left join ProbeSetSE on
+ # (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+ # WHERE
+ # ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ # ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ # ProbeSetFreeze.Name = '%s' AND
+ # ProbeSetXRef.DataId = ProbeSetData.Id AND
+ # ProbeSetData.StrainId = Strain.Id
+ # Order BY
+ # Strain.Name
+ # ''' % (self.name, self.dataset.name)
+ ##XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE
+ #else:
+ # #Geno Data
+ # #XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search.
+ # query = '''
+ # SELECT
+ # Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
+ # FROM
+ # (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
+ # left join GenoSE on
+ # (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+ # WHERE
+ # Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+ # GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+ # GenoFreeze.Name = '%s' AND
+ # GenoXRef.DataId = GenoData.Id AND
+ # GenoData.StrainId = Strain.Id
+ # Order BY
+ # Strain.Name
+ # ''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group), self.name, self.dataset.name)
- self.cursor.execute(query)
- results = self.cursor.fetchall()
+ #self.cursor.execute(query)
+ #results = self.cursor.fetchall()
+
+ # Todo: is this necessary? If not remove
self.data.clear()
if results:
- self.mysqlid = results[0][-1]
+ #self.mysqlid = results[0][-1]
#if samplelist:
for item in results:
#name, value, variance, num_cases = item
@@ -351,8 +365,6 @@ class GeneralTrait:
# self.data[item[0]] = webqtlCaseData(val, var, ndata)
# #end for
# #end if
- #else:
- # pass
#def keys(self):
# return self.__dict__.keys()
@@ -399,7 +411,9 @@ class GeneralTrait:
ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
ProbeSetFreeze.Name = '%s' AND
ProbeSet.Name = '%s'
- """ % (display_fields_string, self.dataset.name, self.name)
+ """ % (escape(display_fields_string),
+ escape(self.dataset.name),
+ escape(self.name))
traitInfo = g.db.execute(query).fetchone()
print("traitInfo is: ", pf(traitInfo))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
@@ -415,7 +429,7 @@ class GeneralTrait:
GenoXRef.GenoId = Geno.Id AND
GenoFreeze.Name = '%s' AND
Geno.Name = '%s'
- """ % (display_fields_string, self.dataset.name, self.name)
+ """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name))
traitInfo = g.db.execute(query).fetchone()
print("traitInfo is: ", pf(traitInfo))
else: #Temp type
@@ -440,7 +454,7 @@ class GeneralTrait:
self.confidential = 1
self.homologeneid = None
- if self.dataset.type == 'ProbeSet' and self.group and self.geneid:
+ if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
#XZ: So I have to test if geneid is number before execute the query.
#XZ: The geneid values in database should be cleaned up.
@@ -451,7 +465,7 @@ class GeneralTrait:
geneidIsNumber = 0
if geneidIsNumber:
- result = g.db.execute("""
+ query = """
SELECT
HomologeneId
FROM
@@ -461,9 +475,8 @@ class GeneralTrait:
InbredSet.Name = '%s' AND
InbredSet.SpeciesId = Species.Id AND
Species.TaxonomyId = Homologene.TaxonomyId
- """, (self.geneid, self.group)).fetchone()
- #self.cursor.execute(query)
- #result = self.cursor.fetchone()
+ """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
+ result = g.db.execute(query).fetchone()
else:
result = None
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index 4301fb50..69602748 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -26,10 +26,11 @@ class DoSearch(object):
assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator"
self.search_operator = search_operator
self.dataset = dataset
+ print("self.dataset is boo: ", type(self.dataset), pf(self.dataset))
+ print("self.dataset.group is: ", pf(self.dataset.group))
#Get group information for dataset and the species id
- self.dataset.get_group()
- self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group)
+ self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name)
def execute(self, query):
"""Executes query and returns results"""
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index cd478110..7c50dfeb 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -30,7 +30,7 @@ from base import webqtlConfig
from utility.THCell import THCell
from utility.TDCell import TDCell
from base.data_set import create_dataset
-from base.webqtlTrait import GeneralTrait
+from base.trait import GeneralTrait
from base.templatePage import templatePage
from wqflask import parser
from wqflask import do_search
@@ -99,8 +99,7 @@ class SearchResultPage(templatePage):
"""
self.trait_list = []
- group = self.dataset.group
- species = webqtlDatabaseFunction.retrieve_species(group=group)
+ species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)
# result_set represents the results for each search term; a search of
# "shh grin2b" would have two sets of results, one for each term
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index aef9219f..2bc4fc9c 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -13,7 +13,8 @@ from base import webqtlConfig
from base import webqtlCaseData
from wqflask.show_trait.SampleList import SampleList
from utility import webqtlUtil, Plot, Bunch
-from base.webqtlTrait import GeneralTrait
+from base.trait import GeneralTrait
+from base.data_set import create_dataset
from dbFunction import webqtlDatabaseFunction
from base.templatePage import templatePage
from basicStatistics import BasicStatisticsFunctions
@@ -33,105 +34,111 @@ class ShowTrait(templatePage):
def __init__(self, args):
print("in ShowTrait, args are:", args)
- self.group = args.group
- self.trait_id = trait_id
- self.dataset = dataset
+ #self.group = args.group
+ self.trait_id = args['trait_id']
+ self.dataset = create_dataset(args['dataset'])
+ self.cell_id = None
#assert self.openMysql(), "No database!"
#print("red3 fd.group:", fd.group)
this_trait = self.get_this_trait()
- print("red4 fd.group:", fd.group)
+ #print("red4 fd.group:", fd.group)
##read genotype file
- fd.group = this_trait.group
+ #fd.group = this_trait.group
- print("[red5] fd.group is:", fd.group)
- fd.readGenotype()
+ #print("[red5] fd.group is:", fd.group)
+ self.dataset.group.read_genotype_file()
+ #fd.readGenotype()
- if not fd.genotype:
- fd.readData(incf1=1)
+ if not self.dataset.group.genotype:
+ self.read_data(incf1=1)
- # determine data editing page format
- variance_data_page = 0
- if fd.formID == 'varianceChoice':
- variance_data_page = 1
-
- if variance_data_page:
- fmID='dataEditing'
- else:
- if fd.enablevariance:
- fmID='pre_dataEditing'
- else:
- fmID='dataEditing'
-
- # Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery
- hddn = OrderedDict(
- FormID = fmID,
- group = fd.group,
- submitID = '',
- scale = 'physic',
- additiveCheck = 'ON',
- showSNP = 'ON',
- showGenes = 'ON',
- method = None,
- parentsf14regression = 'OFF',
- stats_method = '1',
- chromosomes = '-1',
- topten = '',
- viewLegend = 'ON',
- intervalAnalystCheck = 'ON',
- valsHidden = 'OFF',
- database = '',
- criteria = None,
- MDPChoice = None,
- bootCheck = None,
- permCheck = None,
- applyVarianceSE = None,
- sampleNames = '_',
- sampleVals = '_',
- sampleVars = '_',
- otherStrainNames = '_',
- otherStrainVals = '_',
- otherStrainVars = '_',
- extra_attributes = '_',
- other_extra_attributes = '_',
- export_data = None
- )
-
- if fd.enablevariance:
- hddn['enablevariance']='ON'
- if fd.incparentsf1:
- hddn['incparentsf1']='ON'
-
- if this_trait:
- hddn['fullname'] = str(this_trait)
- try:
- hddn['normalPlotTitle'] = this_trait.symbol
- hddn['normalPlotTitle'] += ": "
- hddn['normalPlotTitle'] += this_trait.name
- except:
- hddn['normalPlotTitle'] = str(this_trait.name)
- hddn['fromDataEditingPage'] = 1
- if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
- hddn['trait_type'] = this_trait.dataset.type
- if this_trait.cellid:
- hddn['cellid'] = this_trait.cellid
- else:
- self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
- this_trait.mysqlid)
- heritability = self.cursor.fetchone()
- hddn['heritability'] = heritability
-
- hddn['attribute_names'] = ""
-
- hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor,
- groupName=fd.group)
-
- if fd.identification:
- hddn['identification'] = fd.identification
- else:
- hddn['identification'] = "Un-named trait" #If no identification, set identification to un-named
+ ## determine data editing page format
+ #variance_data_page = 0
+ #if fd.formID == 'varianceChoice':
+ # variance_data_page = 1
+ #
+ #if variance_data_page:
+ # fmID='dataEditing'
+ #else:
+ # if fd.enablevariance:
+ # fmID='pre_dataEditing'
+ # else:
+ # fmID='dataEditing'
+
+ # Todo: Add back in the ones we actually need from below, as we discover we need them
+ hddn = OrderedDict()
+
+
+ ## Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery
+ #hddn = OrderedDict(
+ # FormID = fmID,
+ # group = fd.group,
+ # submitID = '',
+ # scale = 'physic',
+ # additiveCheck = 'ON',
+ # showSNP = 'ON',
+ # showGenes = 'ON',
+ # method = None,
+ # parentsf14regression = 'OFF',
+ # stats_method = '1',
+ # chromosomes = '-1',
+ # topten = '',
+ # viewLegend = 'ON',
+ # intervalAnalystCheck = 'ON',
+ # valsHidden = 'OFF',
+ # database = '',
+ # criteria = None,
+ # MDPChoice = None,
+ # bootCheck = None,
+ # permCheck = None,
+ # applyVarianceSE = None,
+ # sampleNames = '_',
+ # sampleVals = '_',
+ # sampleVars = '_',
+ # otherStrainNames = '_',
+ # otherStrainVals = '_',
+ # otherStrainVars = '_',
+ # extra_attributes = '_',
+ # other_extra_attributes = '_',
+ # export_data = None
+ # )
+
+ #if fd.enablevariance:
+ # hddn['enablevariance']='ON'
+ #if fd.incparentsf1:
+ # hddn['incparentsf1']='ON'
+
+ #if this_trait:
+ # hddn['fullname'] = str(this_trait)
+ # try:
+ # hddn['normalPlotTitle'] = this_trait.symbol
+ # hddn['normalPlotTitle'] += ": "
+ # hddn['normalPlotTitle'] += this_trait.name
+ # except:
+ # hddn['normalPlotTitle'] = str(this_trait.name)
+ # hddn['fromDataEditingPage'] = 1
+ # if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
+ # hddn['trait_type'] = this_trait.dataset.type
+ # if this_trait.cellid:
+ # hddn['cellid'] = this_trait.cellid
+ # else:
+ # self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
+ # this_trait.mysqlid)
+ # heritability = self.cursor.fetchone()
+ # hddn['heritability'] = heritability
+ #
+ # hddn['attribute_names'] = ""
+ #
+ #hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor,
+ # groupName=fd.group)
+ #
+ #if fd.identification:
+ # hddn['identification'] = fd.identification
+ #else:
+ # hddn['identification'] = "Un-named trait" #If no identification, set identification to un-named
self.dispTraitInformation(fd, "", hddn, this_trait) #Display trait information + function buttons
@@ -186,27 +193,109 @@ class ShowTrait(templatePage):
#trait_id = self.fd['trait_id']
#cell_id = self.fd.get('CellID')
- this_trait = webqtlTrait(dataset=dataset,
- name=trait_id,
- cellid=cell_id)
+ this_trait = GeneralTrait(dataset=self.dataset.name,
+ name=self.trait_id,
+ cellid=self.cell_id)
##identification, etc.
- self.fd.identification = '%s : %s' % (this_trait.dataset.shortname, trait_id)
+ self.identification = '%s : %s' % (self.dataset.shortname, self.trait_id)
this_trait.returnURL = webqtlConfig.CGIDIR + webqtlConfig.SCRIPTFILE + '?FormID=showDatabase&database=%s\
- &ProbeSetID=%s&group=%s&parentsf1=on' %(dataset, trait_id, self.fd['group'])
+ &ProbeSetID=%s&group=%s&parentsf1=on' %(self.dataset, self.trait_id, self.dataset.group.name)
- if cell_id:
- self.fd.identification = '%s/%s'%(self.fd.identification, cell_id)
- this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, cell_id)
+ if self.cell_id:
+ self.identification = '%s/%s'%(self.identification, self.cell_id)
+ this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, self.cell_id)
- print("yellow1:", self.group)
- this_trait.retrieveInfo()
- print("yellow2:", self.group)
- this_trait.retrieveData()
- print("yellow3:", self.group)
+ print("yellow1:", self.dataset.group)
+ this_trait.retrieve_info()
+ print("yellow2:", self.dataset.group)
+ this_trait.retrieve_sample_data()
+ print("yellow3:", self.dataset.group)
return this_trait
+ def read_data(self):
+ '''read user input data or from trait data and analysis form'''
+
+ if incf1 == None:
+ incf1 = []
+
+ if not self.genotype:
+ self.readGenotype()
+ if not samplelist:
+ if incf1:
+ samplelist = self.f1list + self.samplelist
+ else:
+ samplelist = self.samplelist
+
+ #print("before traitfiledata self.traitfile is:", pf(self.traitfile))
+
+ traitfiledata = getattr(self, "traitfile", None)
+ traitpastedata = getattr(self, "traitpaste", None)
+ variancefiledata = getattr(self, "variancefile", None)
+ variancepastedata = getattr(self, "variancepaste", None)
+ Nfiledata = getattr(self, "Nfile", None)
+
+ #### Todo: Rewrite below when we get to someone submitting their own trait #####
+
+ def to_float(item):
+ try:
+ return float(item)
+ except ValueError:
+ return None
+
+ print("bottle samplelist is:", samplelist)
+ if traitfiledata:
+ tt = traitfiledata.split()
+ values = map(webqtlUtil.StringAsFloat, tt)
+ elif traitpastedata:
+ tt = traitpastedata.split()
+ values = map(webqtlUtil.StringAsFloat, tt)
+ else:
+ print("mapping formdataasfloat")
+ #values = map(self.FormDataAsFloat, samplelist)
+ values = [to_float(getattr(self, key)) for key in samplelist]
+ print("rocket values is:", values)
+
+
+ if len(values) < len(samplelist):
+ values += [None] * (len(samplelist) - len(values))
+ elif len(values) > len(samplelist):
+ values = values[:len(samplelist)]
+ print("now values is:", values)
+
+
+ if variancefiledata:
+ tt = variancefiledata.split()
+ variances = map(webqtlUtil.StringAsFloat, tt)
+ elif variancepastedata:
+ tt = variancepastedata.split()
+ variances = map(webqtlUtil.StringAsFloat, tt)
+ else:
+ variances = map(self.FormVarianceAsFloat, samplelist)
+
+ if len(variances) < len(samplelist):
+ variances += [None]*(len(samplelist) - len(variances))
+ elif len(variances) > len(samplelist):
+ variances = variances[:len(samplelist)]
+
+ if Nfiledata:
+ tt = string.split(Nfiledata)
+ nsamples = map(webqtlUtil.IntAsFloat, tt)
+ if len(nsamples) < len(samplelist):
+ nsamples += [None]*(len(samplelist) - len(nsamples))
+ else:
+ nsamples = map(self.FormNAsFloat, samplelist)
+
+ ##values, variances, nsamples is obsolete
+ self.allTraitData = {}
+ for i, _sample in enumerate(samplelist):
+ if values[i] != None:
+ self.allTraitData[_sample] = webqtlCaseData(
+ _sample, values[i], variances[i], nsamples[i])
+ print("allTraitData is:", pf(self.allTraitData))
+
+
def dispTraitInformation(self, fd, title1Body, hddn, this_trait):
_Species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, group=fd.group)