diff options
-rw-r--r-- | misc/notes.txt | 10 | ||||
-rw-r--r-- | misc/todo.txt | 4 | ||||
-rwxr-xr-x | wqflask/base/data_set.py | 221 | ||||
-rwxr-xr-x | wqflask/base/trait.py (renamed from wqflask/base/webqtlTrait.py) | 445 | ||||
-rw-r--r-- | wqflask/wqflask/do_search.py | 5 | ||||
-rw-r--r-- | wqflask/wqflask/search_results.py | 5 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 295 |
7 files changed, 650 insertions, 335 deletions
diff --git a/misc/notes.txt b/misc/notes.txt index 59ab79cb..b0c0762c 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -14,6 +14,9 @@ export TERM=screen To search for commands in history if necessary: history | grep "(whatever is being searched for)" +Run web server: +/usr/local/nginx/sbin/nginx + Run server: python runserver.py @@ -63,11 +66,16 @@ Classes should always inherit "object" htop: Gives information on processes, cpu/memory load, etc dstat: Also gives various system information, resource usage, etc df: Reports file system disk space usage - +d =========================================== tidyp - Improves/beautifies html code tidyp -m -i -w 100 index_page.html +=========================================== + +ps -ax - View processes + +kill (process #) diff --git a/misc/todo.txt b/misc/todo.txt index 609e053f..60655a71 100644 --- a/misc/todo.txt +++ b/misc/todo.txt @@ -1 +1,3 @@ -- Read about grep/locate/find
\ No newline at end of file +- Check about using trait id instead of trait name in queries in data_set.py + +- Ask Rob about Probe/cellid traits
\ No newline at end of file diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 70b33014..68f5e5ed 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -21,12 +21,16 @@ # This module is used by GeneNetwork project (www.genenetwork.org) from __future__ import print_function, division +import os from flask import Flask, g from htmlgen import HTMLgen2 as HT +import reaper + import webqtlConfig +from utility import webqtlUtil from MySQLdb import escape_string as escape from pprint import pformat as pf @@ -57,6 +61,74 @@ def create_dataset(dataset_name): return dataset_class(dataset_name) +class DatasetGroup(object): + """ + Each group has multiple datasets; each species has multiple groups. + + For example, Mouse has multiple groups (BXD, BXA, etc), and each group + has multiple datasets associated with it. + + """ + def __init__(self, dataset): + """This sets self.group and self.group_id""" + self.name, self.group_id = g.db.execute(dataset.query).fetchone() + if self.name == 'BXD300': + self.name = "BXD" + + self.incparentsf1 = False + + + #def read_genotype(self): + # self.read_genotype_file() + # + # if not self.genotype: # Didn'd succeed, so we try method 2 + # self.read_genotype_data() + + def read_genotype_file(self): + '''read genotype from .geno file instead of database''' + #if self.group == 'BXD300': + # self.group = 'BXD' + # + #assert self.group, "self.group needs to be set" + + #genotype_1 is Dataset Object without parents and f1 + #genotype_2 is Dataset Object with parents and f1 (not for intercross) + + self.genotype_1 = reaper.Dataset() + + # reaper barfs on unicode filenames, so here we ensure it's a string + full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) + self.genotype_1.read(full_filename) + + print("Got to after read") + + try: + # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py; + _f1, _f12, _mat, _pat = webqtlUtil.ParInfo[self.name] + except KeyError: + _f1 = _f12 = _mat = _pat = None + + self.genotype_2 = self.genotype_1 + if self.genotype_1.type == "group" and _mat and _pat: + self.genotype_2 = self.genotype_1.add(Mat=_mat, Pat=_pat) #, F1=_f1) + + #determine default genotype object + if self.incparentsf1 and self.genotype_1.type != "intercross": + self.genotype = self.genotype_2 + else: + self.incparentsf1 = 0 + self.genotype = self.genotype_1 + + self.samplelist = list(self.genotype.prgy) + self.f1list = [] + self.parlist = [] + + if _f1 and _f12: + self.f1list = [_f1, _f12] + if _mat and _pat: + self.parlist = [_mat, _pat] + + class DataSet(object): """ DataSet class defines a dataset in webqtl, can be either Microarray, @@ -70,27 +142,35 @@ class DataSet(object): self.name = name self.id = None self.type = None - self.group = None self.setup() self.check_confidentiality() self.retrieve_name() - self.get_group() + self.group = DatasetGroup(self) # sets self.group and self.group_id + + + def get_desc(self): + """Gets overridden later, at least for Temp...used by trait's get_given_name""" + return None # Delete this eventually @property def riset(): Weve_Renamed_This_As_Group + + + #@property + #def group(self): + # if not self._group: + # self.get_group() + # + # return self._group + - def get_group(self): - self.group, self.group_id = g.db.execute(self.query).fetchone() - if self.group == 'BXD300': - self.group = "BXD" - #return group def retrieve_name(self): @@ -176,7 +256,7 @@ class PhenotypeDataSet(DataSet): self.type = 'Publish' - self.query = ''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id FROM @@ -239,7 +319,29 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) + this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb)) + + def retrieve_sample_data(self, trait): + query = """ + SELECT + Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id + FROM + (PublishData, Strain, PublishXRef, PublishFreeze) + left join PublishSE on + (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) + left join NStrain on + (NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId) + WHERE + PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND + PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id + Order BY + Strain.Name + """ % (self.trait.name, self.id) + results = g.db.execute(query).fetchall() + return results + class GenotypeDataSet(DataSet): DS_NAME_MAP['Geno'] = 'GenotypeDataSet' @@ -297,6 +399,26 @@ class GenotypeDataSet(DataSet): this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) ) this_trait.location_value = trait_location_value + + def retrieve_sample_data(self, trait): + query = """ + SELECT + Strain.Name, GenoData.value, GenoSE.error, GenoData.Id + FROM + (GenoData, GenoFreeze, Strain, Geno, GenoXRef) + left join GenoSE on + (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + WHERE + Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.Name = '%s' AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id + Order BY + Strain.Name + """ % (webqtlDatabaseFunction.retrieve_species_id(self.group), trait.name, self.name) + results = g.db.execute(query).fetchall() + return results class MrnaAssayDataSet(DataSet): @@ -476,6 +598,42 @@ class MrnaAssayDataSet(DataSet): this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = LRS_score_value = this_trait.lrs this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) + + def get_sequence(self): + query = """ + SELECT + ProbeSet.BlatSeq + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSet.Id=ProbeSetXRef.ProbeSetId and + ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and + ProbeSet.Name = %s + ProbeSetFreeze.Name = %s + """ % (escape(self.name), escape(self.dataset.name)) + results = g.db.execute(query).fetchone() + + return results[0] + + def retrieve_sample_data(self, trait): + query = """ + SELECT + Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id + FROM + (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + left join ProbeSetSE on + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.Name = '%s' AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id + Order BY + Strain.Name + """ % (escape(trait.name), escape(self.name)) + results = g.db.execute(query).fetchall() + return results class TempDataSet(DataSet): @@ -497,6 +655,51 @@ class TempDataSet(DataSet): self.id = 1 self.fullname = 'Temporary Storage' self.shortname = 'Temp' + + + @staticmethod + def handle_pca(desc): + if 'PCA' in desc: + # Todo: Modernize below lines + desc = desc[desc.rindex(':')+1:].strip() + else: + desc = desc[:desc.index('entered')].strip() + return desc + + def get_desc(self): + g.db.execute('SELECT description FROM Temp WHERE Name=%s', self.name) + desc = g.db.fetchone()[0] + desc = self.handle_pca(desc) + return desc + + def get_group(self): + self.cursor.execute(""" + SELECT + InbredSet.Name, InbredSet.Id + FROM + InbredSet, Temp + WHERE + Temp.InbredSetId = InbredSet.Id AND + Temp.Name = "%s" + """, self.name) + self.group, self.group_id = self.cursor.fetchone() + #return self.group + + def retrieve_sample_data(self, trait): + query = """ + SELECT + Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id + FROM + TempData, Temp, Strain + WHERE + TempData.StrainId = Strain.Id AND + TempData.Id = Temp.DataId AND + Temp.name = '%s' + Order BY + Strain.Name + """ % escape(trait.name) + + results = g.db.execute(query).fetchall() def geno_mrna_confidentiality(ob): diff --git a/wqflask/base/webqtlTrait.py b/wqflask/base/trait.py index 5367b41f..d3753fc1 100755 --- a/wqflask/base/webqtlTrait.py +++ b/wqflask/base/trait.py @@ -28,7 +28,7 @@ class GeneralTrait: self.name = kw.get('name', None) # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = kw.get('cellid', None) self.identification = kw.get('identification', 'un-named trait') - self.group = kw.get('group', None) + #self.group = kw.get('group', None) self.haveinfo = kw.get('haveinfo', False) self.sequence = kw.get('sequence', None) # Blat sequence, available for ProbeSet self.data = kw.get('data', {}) @@ -41,28 +41,28 @@ class GeneralTrait: self.dataset, self.name, self.cellid = name2 #if self.dataset and isinstance(self.dataset, basestring): - self.dataset = create_dataset(self.dataset.name) + self.dataset = create_dataset(self.dataset) print("self.dataset is:", self.dataset, type(self.dataset)) #if self.dataset: - self.dataset.get_group() + #self.dataset.get_group() - if self.dataset.type == "Temp": - self.cursor.execute(''' - SELECT - InbredSet.Name - FROM - InbredSet, Temp - WHERE - Temp.InbredSetId = InbredSet.Id AND - Temp.Name = "%s" - ''', self.name) - self.group = self.cursor.fetchone()[0] - else: - self.group = self.dataset.get_group() + #if self.dataset.type == "Temp": + # self.cursor.execute(''' + # SELECT + # InbredSet.Name + # FROM + # InbredSet, Temp + # WHERE + # Temp.InbredSetId = InbredSet.Id AND + # Temp.Name = "%s" + # ''', self.name) + # self.group = self.cursor.fetchone()[0] + #else: + # self.group = self.dataset.get_group() - print("trinity, self.group is:", self.group) + #print("trinity, self.group is:", self.group) # # In ProbeSet, there are maybe several annotations match one sequence @@ -77,76 +77,80 @@ class GeneralTrait: # It also should be changed in other places where it are used. #if self.dataset: - if self.dataset.type == 'ProbeSet': - print("Doing ProbeSet Query") - query = ''' - SELECT - ProbeSet.BlatSeq - FROM - ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSet.Id=ProbeSetXRef.ProbeSetId and - ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and - ProbeSet.Name = %s and - ProbeSetFreeze.Name = %s - ''', (self.name, self.dataset.name) - print("query is:", query) - self.sequence = g.db.execute(*query).fetchone()[0] - #self.sequence = self.cursor.fetchone()[0] - print("self.sequence is:", self.sequence) - - - def getName(self): - str = "" + #if self.dataset.type == 'ProbeSet': + # print("Doing ProbeSet Query") + # query = ''' + # SELECT + # ProbeSet.BlatSeq + # FROM + # ProbeSet, ProbeSetFreeze, ProbeSetXRef + # WHERE + # ProbeSet.Id=ProbeSetXRef.ProbeSetId and + # ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and + # ProbeSet.Name = %s and + # ProbeSetFreeze.Name = %s + # ''', (self.name, self.dataset.name) + # print("query is:", query) + # self.sequence = g.db.execute(*query).fetchone()[0] + # #self.sequence = self.cursor.fetchone()[0] + # print("self.sequence is:", self.sequence) + + + def get_name(self): + stringy = "" if self.dataset and self.name: - str = "%s::%s" % (self.dataset, self.name) + stringy = "%s::%s" % (self.dataset, self.name) if self.cellid: - str += "::" + self.cellid + stringy += "::" + self.cellid else: - str = self.description - return str - - # - # when user enter a trait or GN generate a trait, user want show the name - # not the name that generated by GN randomly, the two follow function are - # used to give the real name and the database. displayName() will show the - # database also, getGivenName() just show the name. - # For other trait, displayName() as same as getName(), getGivenName() as - # same as self.name - # - # Hongqiang 11/29/07 - # - def getGivenName(self): - str = self.name + stringy = self.description + return stringy + + + def get_given_name(self): + """ + when user enter a trait or GN generate a trait, user want show the name + not the name that generated by GN randomly, the two follow function are + used to give the real name and the database. displayName() will show the + database also, getGivenName() just show the name. + For other trait, displayName() as same as getName(), getGivenName() as + same as self.name + + Hongqiang 11/29/07 + + """ + stringy = self.name if self.dataset and self.name: - if self.dataset.type=='Temp': - self.cursor.execute('SELECT description FROM Temp WHERE Name=%s', self.name) - desc = self.cursor.fetchone()[0] - if desc.__contains__('PCA'): - desc = desc[desc.rindex(':')+1:].strip() - else: - desc = desc[:desc.index('entered')].strip() - str = desc - return str + desc = self.dataset.get_desc() + if desc: + #desc = self.handle_pca(desc) + stringy = desc + return stringy + + - def displayName(self): - str = "" + def display_name(self): + stringy = "" if self.dataset and self.name: - if self.dataset.type=='Temp': - desc = self.description - if desc.__contains__('PCA'): - desc = desc[desc.rindex(':')+1:].strip() - else: - desc = desc[:desc.index('entered')].strip() - str = "%s::%s" % (self.dataset, desc) + desc = self.dataset.get_desc() + #desc = self.handle_pca(desc) + if desc: + #desc = self.handle_pca(desc) + #stringy = desc + #if desc.__contains__('PCA'): + # desc = desc[desc.rindex(':')+1:].strip() + #else: + # desc = desc[:desc.index('entered')].strip() + #desc = self.handle_pca(desc) + stringy = "%s::%s" % (self.dataset, desc) else: - str = "%s::%s" % (self.dataset, self.name) + stringy = "%s::%s" % (self.dataset, self.name) if self.cellid: - str += "::" + self.cellid + stringy += "::" + self.cellid else: - str = self.description + stringy = self.description - return str + return stringy #def __str__(self): @@ -155,41 +159,43 @@ class GeneralTrait: #__str__ = getName #__repr__ = __str__ - def exportData(self, samplelist, type="val"): + def export_data(self, samplelist, the_type="val"): """ - export data according to samplelist - mostly used in calculating correlation + export data according to samplelist + mostly used in calculating correlation + """ result = [] for sample in samplelist: if self.data.has_key(sample): - if type=='val': + if the_type=='val': result.append(self.data[sample].val) - elif type=='var': + elif the_type=='var': result.append(self.data[sample].var) - elif type=='N': + elif the_type=='N': result.append(self.data[sample].N) else: - raise KeyError, `type`+' type is incorrect.' + raise KeyError, `the_type`+' the_type is incorrect.' else: result.append(None) return result - def exportInformative(self, incVar=0): + def export_informative(self, incVar=0): """ - export informative sample - mostly used in qtl regression + export informative sample + mostly used in qtl regression + """ samples = [] vals = [] - vars = [] + the_vars = [] for sample, value in self.data.items(): if value.val != None: if not incVar or value.var != None: samples.append(sample) vals.append(value.val) - vars.append(value.var) - return samples, vals, vars + the_vars.append(value.var) + return samples, vals, the_vars # @@ -199,136 +205,144 @@ class GeneralTrait: # # Hongqiang Li, 3/3/2008 # - def getSequence(self): - assert self.cursor - if self.dataset.type == 'ProbeSet': - self.cursor.execute(''' - SELECT - ProbeSet.BlatSeq - FROM - ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSet.Id=ProbeSetXRef.ProbeSetId and - ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and - ProbeSet.Name = %s - ProbeSetFreeze.Name = %s - ''', self.name, self.dataset.name) - #self.cursor.execute(query) - results = self.fetchone() - - return results[0] - + #def getSequence(self): + # assert self.cursor + # if self.dataset.type == 'ProbeSet': + # self.cursor.execute(''' + # SELECT + # ProbeSet.BlatSeq + # FROM + # ProbeSet, ProbeSetFreeze, ProbeSetXRef + # WHERE + # ProbeSet.Id=ProbeSetXRef.ProbeSetId and + # ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and + # ProbeSet.Name = %s + # ProbeSetFreeze.Name = %s + # ''', self.name, self.dataset.name) + # #self.cursor.execute(query) + # results = self.fetchone() + # + # return results[0] - def retrieveData(self, samplelist=None): + def retrieve_sample_data(self, samplelist=None): if samplelist == None: samplelist = [] - assert self.dataset and self.cursor - - if self.dataset.type == 'Temp': - query = ''' - SELECT - Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id - FROM - TempData, Temp, Strain - WHERE - TempData.StrainId = Strain.Id AND - TempData.Id = Temp.DataId AND - Temp.name = '%s' - Order BY - Strain.Name - ''' % self.name - #XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE - elif self.dataset.type == 'Publish': - query = ''' - SELECT - Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id - FROM - (PublishData, Strain, PublishXRef, PublishFreeze) - left join PublishSE on - (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) - left join NStrain on - (NStrain.DataId = PublishData.Id AND - NStrain.StrainId = PublishData.StrainId) - WHERE - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND - PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id - Order BY - Strain.Name - ''' % (self.name, self.dataset.id) + + assert self.dataset + + #if self.cellid: + # #Probe Data + # query = ''' + # SELECT + # Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id + # FROM + # (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef, + # Strain, Probe, ProbeSet) + # left join ProbeSE on + # (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId) + # WHERE + # Probe.Name = '%s' AND ProbeSet.Name = '%s' AND + # Probe.ProbeSetId = ProbeSet.Id AND + # ProbeXRef.ProbeId = Probe.Id AND + # ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND + # ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + # ProbeSetFreeze.Name = '%s' AND + # ProbeXRef.DataId = ProbeData.Id AND + # ProbeData.StrainId = Strain.Id + # Order BY + # Strain.Name + # ''' % (self.cellid, self.name, self.dataset.name) + # + #else: + results = self.dataset.retrieve_sample_data(self) + + #if self.dataset.type == 'Temp': + # query = ''' + # SELECT + # Strain.Name, TempData.value, TempData.SE, TempData.NStrain, TempData.Id + # FROM + # TempData, Temp, Strain + # WHERE + # TempData.StrainId = Strain.Id AND + # TempData.Id = Temp.DataId AND + # Temp.name = '%s' + # Order BY + # Strain.Name + # ''' % self.name + ##XZ, 03/02/2009: Xiaodong changed Data to PublishData, SE to PublishSE + #elif self.dataset.type == 'Publish': + # query = ''' + # SELECT + # Strain.Name, PublishData.value, PublishSE.error, NStrain.count, PublishData.Id + # FROM + # (PublishData, Strain, PublishXRef, PublishFreeze) + # left join PublishSE on + # (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) + # left join NStrain on + # (NStrain.DataId = PublishData.Id AND + # NStrain.StrainId = PublishData.StrainId) + # WHERE + # PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + # PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND + # PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id + # Order BY + # Strain.Name + # ''' % (self.name, self.dataset.id) #XZ, 03/02/2009: Xiaodong changed Data to ProbeData, SE to ProbeSE - elif self.cellid: - #Probe Data - query = ''' - SELECT - Strain.Name, ProbeData.value, ProbeSE.error, ProbeData.Id - FROM - (ProbeData, ProbeFreeze, ProbeSetFreeze, ProbeXRef, - Strain, Probe, ProbeSet) - left join ProbeSE on - (ProbeSE.DataId = ProbeData.Id AND ProbeSE.StrainId = ProbeData.StrainId) - WHERE - Probe.Name = '%s' AND ProbeSet.Name = '%s' AND - Probe.ProbeSetId = ProbeSet.Id AND - ProbeXRef.ProbeId = Probe.Id AND - ProbeXRef.ProbeFreezeId = ProbeFreeze.Id AND - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeXRef.DataId = ProbeData.Id AND - ProbeData.StrainId = Strain.Id - Order BY - Strain.Name - ''' % (self.cellid, self.name, self.dataset.name) + #elif self.cellid: + #XZ, 03/02/2009: Xiaodong added this block for ProbeSetData and ProbeSetSE - elif self.dataset.type == 'ProbeSet': - #ProbeSet Data - query = ''' - SELECT - Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id - FROM - (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) - left join ProbeSetSE on - (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) - WHERE - ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSetXRef.DataId = ProbeSetData.Id AND - ProbeSetData.StrainId = Strain.Id - Order BY - Strain.Name - ''' % (self.name, self.dataset.name) - #XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE - else: - #Geno Data - #XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search. - query = ''' - SELECT - Strain.Name, GenoData.value, GenoSE.error, GenoData.Id - FROM - (GenoData, GenoFreeze, Strain, Geno, GenoXRef) - left join GenoSE on - (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) - WHERE - Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = '%s' AND - GenoXRef.DataId = GenoData.Id AND - GenoData.StrainId = Strain.Id - Order BY - Strain.Name - ''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group), self.name, self.dataset.name) + #elif self.dataset.type == 'ProbeSet': + # #ProbeSet Data + # query = ''' + # SELECT + # Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id + # FROM + # (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + # left join ProbeSetSE on + # (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + # WHERE + # ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + # ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + # ProbeSetFreeze.Name = '%s' AND + # ProbeSetXRef.DataId = ProbeSetData.Id AND + # ProbeSetData.StrainId = Strain.Id + # Order BY + # Strain.Name + # ''' % (self.name, self.dataset.name) + ##XZ, 03/02/2009: Xiaodong changeded Data to GenoData, SE to GenoSE + #else: + # #Geno Data + # #XZ: The SpeciesId is not necessary, but it's nice to keep it to speed up database search. + # query = ''' + # SELECT + # Strain.Name, GenoData.value, GenoSE.error, GenoData.Id + # FROM + # (GenoData, GenoFreeze, Strain, Geno, GenoXRef) + # left join GenoSE on + # (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + # WHERE + # Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND + # GenoXRef.GenoFreezeId = GenoFreeze.Id AND + # GenoFreeze.Name = '%s' AND + # GenoXRef.DataId = GenoData.Id AND + # GenoData.StrainId = Strain.Id + # Order BY + # Strain.Name + # ''' % (webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group), self.name, self.dataset.name) - self.cursor.execute(query) - results = self.cursor.fetchall() + #self.cursor.execute(query) + #results = self.cursor.fetchall() + + # Todo: is this necessary? If not remove self.data.clear() if results: - self.mysqlid = results[0][-1] + #self.mysqlid = results[0][-1] #if samplelist: for item in results: #name, value, variance, num_cases = item @@ -351,8 +365,6 @@ class GeneralTrait: # self.data[item[0]] = webqtlCaseData(val, var, ndata) # #end for # #end if - #else: - # pass #def keys(self): # return self.__dict__.keys() @@ -399,7 +411,9 @@ class GeneralTrait: ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' - """ % (display_fields_string, self.dataset.name, self.name) + """ % (escape(display_fields_string), + escape(self.dataset.name), + escape(self.name)) traitInfo = g.db.execute(query).fetchone() print("traitInfo is: ", pf(traitInfo)) #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name @@ -415,7 +429,7 @@ class GeneralTrait: GenoXRef.GenoId = Geno.Id AND GenoFreeze.Name = '%s' AND Geno.Name = '%s' - """ % (display_fields_string, self.dataset.name, self.name) + """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name)) traitInfo = g.db.execute(query).fetchone() print("traitInfo is: ", pf(traitInfo)) else: #Temp type @@ -440,7 +454,7 @@ class GeneralTrait: self.confidential = 1 self.homologeneid = None - if self.dataset.type == 'ProbeSet' and self.group and self.geneid: + if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. #XZ: The geneid values in database should be cleaned up. @@ -451,7 +465,7 @@ class GeneralTrait: geneidIsNumber = 0 if geneidIsNumber: - result = g.db.execute(""" + query = """ SELECT HomologeneId FROM @@ -461,9 +475,8 @@ class GeneralTrait: InbredSet.Name = '%s' AND InbredSet.SpeciesId = Species.Id AND Species.TaxonomyId = Homologene.TaxonomyId - """, (self.geneid, self.group)).fetchone() - #self.cursor.execute(query) - #result = self.cursor.fetchone() + """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) + result = g.db.execute(query).fetchone() else: result = None diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 4301fb50..69602748 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -26,10 +26,11 @@ class DoSearch(object): assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset + print("self.dataset is boo: ", type(self.dataset), pf(self.dataset)) + print("self.dataset.group is: ", pf(self.dataset.group)) #Get group information for dataset and the species id - self.dataset.get_group() - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group) + self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) def execute(self, query): """Executes query and returns results""" diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index cd478110..7c50dfeb 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -30,7 +30,7 @@ from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell from base.data_set import create_dataset -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from base.templatePage import templatePage from wqflask import parser from wqflask import do_search @@ -99,8 +99,7 @@ class SearchResultPage(templatePage): """ self.trait_list = [] - group = self.dataset.group - species = webqtlDatabaseFunction.retrieve_species(group=group) + species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index aef9219f..2bc4fc9c 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -13,7 +13,8 @@ from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList from utility import webqtlUtil, Plot, Bunch -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait +from base.data_set import create_dataset from dbFunction import webqtlDatabaseFunction from base.templatePage import templatePage from basicStatistics import BasicStatisticsFunctions @@ -33,105 +34,111 @@ class ShowTrait(templatePage): def __init__(self, args): print("in ShowTrait, args are:", args) - self.group = args.group - self.trait_id = trait_id - self.dataset = dataset + #self.group = args.group + self.trait_id = args['trait_id'] + self.dataset = create_dataset(args['dataset']) + self.cell_id = None #assert self.openMysql(), "No database!" #print("red3 fd.group:", fd.group) this_trait = self.get_this_trait() - print("red4 fd.group:", fd.group) + #print("red4 fd.group:", fd.group) ##read genotype file - fd.group = this_trait.group + #fd.group = this_trait.group - print("[red5] fd.group is:", fd.group) - fd.readGenotype() + #print("[red5] fd.group is:", fd.group) + self.dataset.group.read_genotype_file() + #fd.readGenotype() - if not fd.genotype: - fd.readData(incf1=1) + if not self.dataset.group.genotype: + self.read_data(incf1=1) - # determine data editing page format - variance_data_page = 0 - if fd.formID == 'varianceChoice': - variance_data_page = 1 - - if variance_data_page: - fmID='dataEditing' - else: - if fd.enablevariance: - fmID='pre_dataEditing' - else: - fmID='dataEditing' - - # Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery - hddn = OrderedDict( - FormID = fmID, - group = fd.group, - submitID = '', - scale = 'physic', - additiveCheck = 'ON', - showSNP = 'ON', - showGenes = 'ON', - method = None, - parentsf14regression = 'OFF', - stats_method = '1', - chromosomes = '-1', - topten = '', - viewLegend = 'ON', - intervalAnalystCheck = 'ON', - valsHidden = 'OFF', - database = '', - criteria = None, - MDPChoice = None, - bootCheck = None, - permCheck = None, - applyVarianceSE = None, - sampleNames = '_', - sampleVals = '_', - sampleVars = '_', - otherStrainNames = '_', - otherStrainVals = '_', - otherStrainVars = '_', - extra_attributes = '_', - other_extra_attributes = '_', - export_data = None - ) - - if fd.enablevariance: - hddn['enablevariance']='ON' - if fd.incparentsf1: - hddn['incparentsf1']='ON' - - if this_trait: - hddn['fullname'] = str(this_trait) - try: - hddn['normalPlotTitle'] = this_trait.symbol - hddn['normalPlotTitle'] += ": " - hddn['normalPlotTitle'] += this_trait.name - except: - hddn['normalPlotTitle'] = str(this_trait.name) - hddn['fromDataEditingPage'] = 1 - if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet': - hddn['trait_type'] = this_trait.dataset.type - if this_trait.cellid: - hddn['cellid'] = this_trait.cellid - else: - self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" % - this_trait.mysqlid) - heritability = self.cursor.fetchone() - hddn['heritability'] = heritability - - hddn['attribute_names'] = "" - - hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor, - groupName=fd.group) - - if fd.identification: - hddn['identification'] = fd.identification - else: - hddn['identification'] = "Un-named trait" #If no identification, set identification to un-named + ## determine data editing page format + #variance_data_page = 0 + #if fd.formID == 'varianceChoice': + # variance_data_page = 1 + # + #if variance_data_page: + # fmID='dataEditing' + #else: + # if fd.enablevariance: + # fmID='pre_dataEditing' + # else: + # fmID='dataEditing' + + # Todo: Add back in the ones we actually need from below, as we discover we need them + hddn = OrderedDict() + + + ## Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery + #hddn = OrderedDict( + # FormID = fmID, + # group = fd.group, + # submitID = '', + # scale = 'physic', + # additiveCheck = 'ON', + # showSNP = 'ON', + # showGenes = 'ON', + # method = None, + # parentsf14regression = 'OFF', + # stats_method = '1', + # chromosomes = '-1', + # topten = '', + # viewLegend = 'ON', + # intervalAnalystCheck = 'ON', + # valsHidden = 'OFF', + # database = '', + # criteria = None, + # MDPChoice = None, + # bootCheck = None, + # permCheck = None, + # applyVarianceSE = None, + # sampleNames = '_', + # sampleVals = '_', + # sampleVars = '_', + # otherStrainNames = '_', + # otherStrainVals = '_', + # otherStrainVars = '_', + # extra_attributes = '_', + # other_extra_attributes = '_', + # export_data = None + # ) + + #if fd.enablevariance: + # hddn['enablevariance']='ON' + #if fd.incparentsf1: + # hddn['incparentsf1']='ON' + + #if this_trait: + # hddn['fullname'] = str(this_trait) + # try: + # hddn['normalPlotTitle'] = this_trait.symbol + # hddn['normalPlotTitle'] += ": " + # hddn['normalPlotTitle'] += this_trait.name + # except: + # hddn['normalPlotTitle'] = str(this_trait.name) + # hddn['fromDataEditingPage'] = 1 + # if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet': + # hddn['trait_type'] = this_trait.dataset.type + # if this_trait.cellid: + # hddn['cellid'] = this_trait.cellid + # else: + # self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" % + # this_trait.mysqlid) + # heritability = self.cursor.fetchone() + # hddn['heritability'] = heritability + # + # hddn['attribute_names'] = "" + # + #hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor, + # groupName=fd.group) + # + #if fd.identification: + # hddn['identification'] = fd.identification + #else: + # hddn['identification'] = "Un-named trait" #If no identification, set identification to un-named self.dispTraitInformation(fd, "", hddn, this_trait) #Display trait information + function buttons @@ -186,27 +193,109 @@ class ShowTrait(templatePage): #trait_id = self.fd['trait_id'] #cell_id = self.fd.get('CellID') - this_trait = webqtlTrait(dataset=dataset, - name=trait_id, - cellid=cell_id) + this_trait = GeneralTrait(dataset=self.dataset.name, + name=self.trait_id, + cellid=self.cell_id) ##identification, etc. - self.fd.identification = '%s : %s' % (this_trait.dataset.shortname, trait_id) + self.identification = '%s : %s' % (self.dataset.shortname, self.trait_id) this_trait.returnURL = webqtlConfig.CGIDIR + webqtlConfig.SCRIPTFILE + '?FormID=showDatabase&database=%s\ - &ProbeSetID=%s&group=%s&parentsf1=on' %(dataset, trait_id, self.fd['group']) + &ProbeSetID=%s&group=%s&parentsf1=on' %(self.dataset, self.trait_id, self.dataset.group.name) - if cell_id: - self.fd.identification = '%s/%s'%(self.fd.identification, cell_id) - this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, cell_id) + if self.cell_id: + self.identification = '%s/%s'%(self.identification, self.cell_id) + this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, self.cell_id) - print("yellow1:", self.group) - this_trait.retrieveInfo() - print("yellow2:", self.group) - this_trait.retrieveData() - print("yellow3:", self.group) + print("yellow1:", self.dataset.group) + this_trait.retrieve_info() + print("yellow2:", self.dataset.group) + this_trait.retrieve_sample_data() + print("yellow3:", self.dataset.group) return this_trait + def read_data(self): + '''read user input data or from trait data and analysis form''' + + if incf1 == None: + incf1 = [] + + if not self.genotype: + self.readGenotype() + if not samplelist: + if incf1: + samplelist = self.f1list + self.samplelist + else: + samplelist = self.samplelist + + #print("before traitfiledata self.traitfile is:", pf(self.traitfile)) + + traitfiledata = getattr(self, "traitfile", None) + traitpastedata = getattr(self, "traitpaste", None) + variancefiledata = getattr(self, "variancefile", None) + variancepastedata = getattr(self, "variancepaste", None) + Nfiledata = getattr(self, "Nfile", None) + + #### Todo: Rewrite below when we get to someone submitting their own trait ##### + + def to_float(item): + try: + return float(item) + except ValueError: + return None + + print("bottle samplelist is:", samplelist) + if traitfiledata: + tt = traitfiledata.split() + values = map(webqtlUtil.StringAsFloat, tt) + elif traitpastedata: + tt = traitpastedata.split() + values = map(webqtlUtil.StringAsFloat, tt) + else: + print("mapping formdataasfloat") + #values = map(self.FormDataAsFloat, samplelist) + values = [to_float(getattr(self, key)) for key in samplelist] + print("rocket values is:", values) + + + if len(values) < len(samplelist): + values += [None] * (len(samplelist) - len(values)) + elif len(values) > len(samplelist): + values = values[:len(samplelist)] + print("now values is:", values) + + + if variancefiledata: + tt = variancefiledata.split() + variances = map(webqtlUtil.StringAsFloat, tt) + elif variancepastedata: + tt = variancepastedata.split() + variances = map(webqtlUtil.StringAsFloat, tt) + else: + variances = map(self.FormVarianceAsFloat, samplelist) + + if len(variances) < len(samplelist): + variances += [None]*(len(samplelist) - len(variances)) + elif len(variances) > len(samplelist): + variances = variances[:len(samplelist)] + + if Nfiledata: + tt = string.split(Nfiledata) + nsamples = map(webqtlUtil.IntAsFloat, tt) + if len(nsamples) < len(samplelist): + nsamples += [None]*(len(samplelist) - len(nsamples)) + else: + nsamples = map(self.FormNAsFloat, samplelist) + + ##values, variances, nsamples is obsolete + self.allTraitData = {} + for i, _sample in enumerate(samplelist): + if values[i] != None: + self.allTraitData[_sample] = webqtlCaseData( + _sample, values[i], variances[i], nsamples[i]) + print("allTraitData is:", pf(self.allTraitData)) + + def dispTraitInformation(self, fd, title1Body, hddn, this_trait): _Species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, group=fd.group) |