diff options
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x | wqflask/base/data_set.py | 420 | ||||
-rw-r--r-- | wqflask/base/generate_probesetfreeze_file.py | 31 | ||||
-rw-r--r-- | wqflask/base/species.py | 12 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 145 | ||||
-rwxr-xr-x | wqflask/base/webqtlConfig.py | 9 | ||||
-rwxr-xr-x | wqflask/base/webqtlConfigLocal.py | 16 |
6 files changed, 454 insertions, 179 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 50ef8f57..07fe9cd9 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -22,10 +22,14 @@ from __future__ import absolute_import, print_function, division import os +import math +import string +import collections -from flask import Flask, g +import json +import itertools -from htmlgen import HTMLgen2 as HT +from flask import Flask, g import reaper @@ -33,6 +37,8 @@ from base import webqtlConfig from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil +from utility.benchmark import Bench +from wqflask.my_pylmm.pyLMM import chunks from MySQLdb import escape_string as escape from pprint import pformat as pf @@ -41,29 +47,102 @@ from pprint import pformat as pf DS_NAME_MAP = {} def create_dataset(dataset_name): - #cursor = db_conn.cursor() - print("dataset_name:", dataset_name) + #print("dataset_name:", dataset_name) query = """ SELECT DBType.Name FROM DBList, DBType - WHERE DBList.Name = '%s' and + WHERE DBList.Name = '{}' and DBType.Id = DBList.DBTypeId - """ % (escape(dataset_name)) - print("query is: ", pf(query)) + """.format(escape(dataset_name)) + #print("query is: ", pf(query)) dataset_type = g.db.execute(query).fetchone().Name #dataset_type = cursor.fetchone()[0] - print("[blubber] dataset_type:", pf(dataset_type)) + #print("[blubber] dataset_type:", pf(dataset_type)) dataset_ob = DS_NAME_MAP[dataset_type] #dataset_class = getattr(data_set, dataset_ob) - print("dataset_ob:", dataset_ob) - print("DS_NAME_MAP:", pf(DS_NAME_MAP)) + #print("dataset_ob:", dataset_ob) + #print("DS_NAME_MAP:", pf(DS_NAME_MAP)) dataset_class = globals()[dataset_ob] return dataset_class(dataset_name) +def create_in_clause(items): + """Create an in clause for mysql""" + in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) + in_clause = '( {} )'.format(in_clause) + return in_clause + + +def mescape(*items): + """Multiple escape""" + escaped = [escape(str(item)) for item in items] + #print("escaped is:", escaped) + return escaped + + +class Markers(object): + """Todo: Build in cacheing so it saves us reading the same file more than once""" + def __init__(self, name): + json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) + self.markers = json.load(json_data_fh) + + def add_pvalues(self, p_values): + #print("length of self.markers:", len(self.markers)) + #print("length of p_values:", len(p_values)) + + # THIS IS only needed for the case when we are limiting the number of p-values calculated + if len(self.markers) < len(p_values): + self.markers = self.markers[:len(p_values)] + + for marker, p_value in itertools.izip(self.markers, p_values): + marker['p_value'] = p_value + print("p_value is:", marker['p_value']) + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + + + +class HumanMarkers(Markers): + + def __init__(self, name): + marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim')) + self.markers = [] + for line in marker_data_fh: + splat = line.strip().split() + marker = {} + marker['chr'] = int(splat[0]) + marker['name'] = splat[1] + marker['Mb'] = float(splat[3]) / 1000000 + self.markers.append(marker) + + #print("markers is: ", pf(self.markers)) + + + def add_pvalues(self, p_values): + #for marker, p_value in itertools.izip(self.markers, p_values): + # if marker['Mb'] <= 0 and marker['chr'] == 0: + # continue + # marker['p_value'] = p_value + # print("p_value is:", marker['p_value']) + # marker['lod_score'] = -math.log10(marker['p_value']) + # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + super(HumanMarkers, self).add_pvalues(p_values) + + with Bench("deleting markers"): + markers = [] + for marker in self.markers: + if not marker['Mb'] <= 0 and not marker['chr'] == 0: + markers.append(marker) + self.markers = markers + + class DatasetGroup(object): """ @@ -79,22 +158,41 @@ class DatasetGroup(object): if self.name == 'BXD300': self.name = "BXD" + self.f1list = None + self.parlist = None + self.get_f1_parent_strains() + #print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) + self.species = webqtlDatabaseFunction.retrieve_species(self.name) self.incparentsf1 = False - self.f1list = None - self.parlist = None self.allsamples = None + + + def get_markers(self): + #print("self.species is:", self.species) + if self.species == "human": + marker_class = HumanMarkers + else: + marker_class = Markers + self.markers = marker_class(self.name) + - #def read_genotype(self): - # self.read_genotype_file() - # - # if not self.genotype: # Didn'd succeed, so we try method 2 - # self.read_genotype_data() + def get_f1_parent_strains(self): + try: + # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py; + f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name] + except KeyError: + f1 = f12 = maternal = paternal = None + + if f1 and f12: + self.f1list = [f1, f12] + if maternal and paternal: + self.parlist = [maternal, paternal] def read_genotype_file(self): - '''read genotype from .geno file instead of database''' + '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': # self.group = 'BXD' # @@ -104,38 +202,24 @@ class DatasetGroup(object): #genotype_2 is Dataset Object with parents and f1 (not for intercross) genotype_1 = reaper.Dataset() - + # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) genotype_1.read(full_filename) - print("Got to after read") - - try: - # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py; - f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name] - except KeyError: - f1 = f12 = maternal = paternal = None - - - if genotype_1.type == "group" and maternal and paternal: - genotype_2 = genotype_1.add(Mat=maternal, Pat=paternal) #, F1=_f1) + if genotype_1.type == "group" and self.parlist: + genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) else: genotype_2 = genotype_1 #determine default genotype object if self.incparentsf1 and genotype_1.type != "intercross": - self.genotype = genotype_2 + genotype = genotype_2 else: self.incparentsf1 = 0 - self.genotype = genotype_1 - - self.samplelist = list(self.genotype.prgy) + genotype = genotype_1 - if f1 and f12: - self.f1list = [f1, f12] - if maternal and paternal: - self.parlist = [maternal, paternal] + self.samplelist = list(genotype.prgy) class DataSet(object): @@ -159,10 +243,10 @@ class DataSet(object): self.retrieve_other_names() self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype + self.group.read_genotype_file() self.species = species.TheSpecies(self) - - - + + def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" return None @@ -209,14 +293,14 @@ class DataSet(object): self.name, self.name, self.name)) - print("query_args are:", query_args) + #print("query_args are:", query_args) - print(""" - SELECT Id, Name, FullName, ShortName - FROM %s - WHERE public > %s AND - (Name = '%s' OR FullName = '%s' OR ShortName = '%s') - """ % (query_args)) + #print(""" + # SELECT Id, Name, FullName, ShortName + # FROM %s + # WHERE public > %s AND + # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') + # """ % (query_args)) self.id, self.name, self.fullname, self.shortname = g.db.execute(""" SELECT Id, Name, FullName, ShortName @@ -227,11 +311,7 @@ class DataSet(object): #self.cursor.execute(query) #self.id, self.name, self.fullname, self.shortname = self.cursor.fetchone() - - - #def genHTML(self, Class='c0dd'): - # return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class), - # url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank") + class PhenotypeDataSet(DataSet): DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' @@ -291,6 +371,19 @@ class PhenotypeDataSet(DataSet): # (Urgently?) Need to write this pass + def get_trait_list(self): + query = """ + select PublishXRef.Id + from PublishXRef, PublishFreeze + where PublishFreeze.InbredSetId=PublishXRef.InbredSetId + and PublishFreeze.Id = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + trait_data = {} + for trait in results: + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + return trait_data + def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: if not this_trait.haveinfo: @@ -301,7 +394,7 @@ class PhenotypeDataSet(DataSet): continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description - this_trait.description_display = description + this_trait.description_display = unicode(description, "utf8") if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" @@ -359,7 +452,7 @@ class PhenotypeDataSet(DataSet): PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id Order BY Strain.Name - """ % (trait.name, self.id) + """ % (trait, self.id) results = g.db.execute(query).fetchall() return results @@ -399,6 +492,19 @@ class GenotypeDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) + + def get_trait_list(self): + query = """ + select Geno.Name + from Geno, GenoXRef + where GenoXRef.GenoId = Geno.Id + and GenoFreezeId = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + trait_data = {} + for trait in results: + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + return trait_data def get_trait_info(self, trait_list, species=None): for this_trait in trait_list: @@ -437,7 +543,7 @@ class GenotypeDataSet(DataSet): GenoData.StrainId = Strain.Id Order BY Strain.Name - """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait.name, self.name) + """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name) results = g.db.execute(query).fetchall() return results @@ -509,10 +615,95 @@ class MrnaAssayDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) + + def get_trait_list_1(self): + query = """ + select ProbeSet.Name + from ProbeSet, ProbeSetXRef + where ProbeSetXRef.ProbeSetId = ProbeSet.Id + and ProbeSetFreezeId = {} + """.format(escape(str(self.id))) + results = g.db.execute(query).fetchall() + #print("After get_trait_list query") + trait_data = {} + for trait in results: + print("Retrieving sample_data for ", trait[0]) + trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) + #print("After retrieve_sample_data") + return trait_data + + def get_trait_data(self): + self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + query = """ + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) + results = dict(g.db.execute(query).fetchall()) + sample_ids = [results[item] for item in self.samplelist] + + # MySQL limits the number of tables that can be used in a join to 61, + # so we break the sample ids into smaller chunks + # Postgres doesn't have that limit, so we can get rid of this after we transition + chunk_size = 50 + number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) + trait_sample_data = [] + for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): + + #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId + #tempTable = None + #if GeneId and db.type == "ProbeSet": + # if method == "3": + # tempTable = self.getTempLiteratureTable(species=species, + # input_species_geneid=GeneId, + # returnNumber=returnNumber) + # + # if method == "4" or method == "5": + # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, + # TissueProbeSetFreezeId=tissueProbeSetFreezeId, + # method=method, + # returnNumber=returnNumber) + + temp = ['T%s.value' % item for item in sample_ids_step] + query = "SELECT {}.Name,".format(escape(self.type)) + data_start_pos = 1 + query += string.join(temp, ', ') + query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type, + self.type, + self.type)) + + for item in sample_ids_step: + query += """ + left join {}Data as T{} on T{}.Id = {}XRef.DataId + and T{}.StrainId={}\n + """.format(*mescape(self.type, item, item, self.type, item, item)) + + query += """ + WHERE {}XRef.{}FreezeId = {}Freeze.Id + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(self.type, self.type, self.type, self.type, + self.name, self.type, self.type, self.type, self.type)) + results = g.db.execute(query).fetchall() + trait_sample_data.append(results) + + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) + + # put all of the separate data together into a dictionary where the keys are + # trait names and values are lists of sample values + for trait_counter in range(trait_count): + trait_name = trait_sample_data[0][trait_counter][0] + for chunk_counter in range(int(number_chunks)): + self.trait_data[trait_name] += ( + trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) + def get_trait_info(self, trait_list=None, species=''): - # Note: setting trait_list to [] is probably not a great idea. + # Note: setting trait_list to [] is probably not a great idea. if not trait_list: trait_list = [] @@ -521,9 +712,7 @@ class MrnaAssayDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieveInfo(QTL=1) - if this_trait.symbol: - pass - else: + if not this_trait.symbol: this_trait.symbol = "N/A" #XZ, 12/08/2008: description @@ -531,60 +720,56 @@ class MrnaAssayDataSet(DataSet): description_string = str(this_trait.description).strip() target_string = str(this_trait.probe_target_description).strip() - description_display = '' - if len(description_string) > 1 and description_string != 'None': description_display = description_string else: description_display = this_trait.symbol - if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + if (len(description_display) > 1 and description_display != 'N/A' and + len(target_string) > 1 and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template this_trait.description_display = description_display - #print(" xxxxdd [%s]: %s" % (type(this_trait.description_display), description_display)) #XZ: trait_location_value is used for sorting trait_location_repr = 'N/A' trait_location_value = 1000000 if this_trait.chr and this_trait.mb: - try: - trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - except: - if this_trait.chr.upper() == 'X': - trait_location_value = 20*1000 + this_trait.mb - else: - trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) ) + #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y") + #This is so we can convert the location to a number used for sorting + trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb) + #try: + # trait_location_value = int(this_trait.chr)*1000 + this_trait.mb + #except ValueError: + # if this_trait.chr.upper() == 'X': + # trait_location_value = 20*1000 + this_trait.mb + # else: + # trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 + + # this_trait.mb) + + #ZS: Put this in function currently called "convert_location_to_value" + this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, + float(this_trait.mb)) this_trait.location_value = trait_location_value - #this_trait.trait_location_value = trait_location_value - #XZ, 01/12/08: This SQL query is much faster. + #Get mean expression value query = ( -"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' + """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - print("query is:", pf(query)) + #print("query is:", pf(query)) result = g.db.execute(query).fetchone() + + mean = result[0] if result else 0 - if result: - if result[0]: - mean = result[0] - else: - mean=0 - else: - mean = 0 - - #XZ, 06/05/2009: It is neccessary to turn on nowrap - this_trait.mean = repr = "%2.3f" % mean + this_trait.mean = "%2.3f" % mean #LRS and its location this_trait.LRS_score_repr = 'N/A' @@ -603,23 +788,39 @@ class MrnaAssayDataSet(DataSet): result = self.cursor.fetchone() if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + #if result[0] and result[1]: + # lrs_chr = result[0] + # lrs_mb = result[1] + lrs_chr, lrs_mb = result + #XZ: LRS_location_value is used for sorting + lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb) + + #try: + # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb) + #except: + # if lrs_chr.upper() == 'X': + # lrs_location_value = 20*1000 + float(lrs_mb) + # else: + # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 + + # float(lrs_mb)) + + this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_score_value = this_trait.lrs + this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) + + + def convert_location_to_value(self, chromosome, mb): + try: + location_value = int(chromosome)*1000 + float(mb) + except ValueError: + if chromosome.upper() == 'X': + location_value = 20*1000 + float(mb) + else: + location_value = (ord(str(chromosome).upper()[0])*1000 + + float(mb)) + + return location_value - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) - def get_sequence(self): query = """ SELECT @@ -633,9 +834,9 @@ class MrnaAssayDataSet(DataSet): ProbeSetFreeze.Name = %s """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() - return results[0] + def retrieve_sample_data(self, trait): query = """ SELECT @@ -652,7 +853,7 @@ class MrnaAssayDataSet(DataSet): ProbeSetData.StrainId = Strain.Id Order BY Strain.Name - """ % (escape(trait.name), escape(self.name)) + """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() return results @@ -725,7 +926,7 @@ class TempDataSet(DataSet): def geno_mrna_confidentiality(ob): dataset_table = ob.type + "Freeze" - print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) + #print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table) @@ -741,3 +942,4 @@ def geno_mrna_confidentiality(ob): if confidential: # Allow confidential data later NoConfindetialDataForYouTodaySorry + diff --git a/wqflask/base/generate_probesetfreeze_file.py b/wqflask/base/generate_probesetfreeze_file.py new file mode 100644 index 00000000..a0ff804b --- /dev/null +++ b/wqflask/base/generate_probesetfreeze_file.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import, print_function, division +import os +import math + +import json +import itertools + +from flask import Flask, g + +from base import webqtlConfig +from dbFunction import webqtlDatabaseFunction +from utility import webqtlUtil + +from MySQLdb import escape_string as escape +from pprint import pformat as pf + + +query = """ select ProbeSet.Name + from ProbeSetXRef, + ProbeSetFreeze, + ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and + ProbeSetFreeze.Name = "EPFLMouseMuscleCDRMA1211" and + ProbeSetXRef.ProbeSetId = ProbeSet.Id; + """ + +markers = g.db.execute(query).fetchall() +print("markers: ", pf(markers)) + +if __name__ == '__main__': + main()
\ No newline at end of file diff --git a/wqflask/base/species.py b/wqflask/base/species.py index 9d4cac4c..191f4535 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -16,8 +16,7 @@ class TheSpecies(object): print("self.dataset is:", pf(self.dataset.__dict__)) self.chromosomes = Chromosomes(self.dataset) self.genome_mb_length = self.chromosomes.get_genome_mb_length() - - + #@property #def chromosomes(self): # chromosomes = [("All", -1)] @@ -31,7 +30,8 @@ class TheSpecies(object): # return chromosomes class IndChromosome(object): - def __init__(self, length): + def __init__(self, name, length): + self.name = name self.length = length @property @@ -50,7 +50,7 @@ class Chromosomes(object): results = g.db.execute(""" Select - Chr_Length.Name, Length from Chr_Length, InbredSet + Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet where Chr_Length.SpeciesId = InbredSet.SpeciesId AND InbredSet.Name = %s @@ -59,10 +59,10 @@ class Chromosomes(object): print("bike:", results) for item in results: - self.chromosomes[item.Name] = IndChromosome(item.Length) + self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) self.set_mb_graph_interval() - self.get_cm_length_list() + #self.get_cm_length_list() def set_mb_graph_interval(self): diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 241bf2ab..db76ddea 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,6 +1,8 @@ from __future__ import absolute_import, division, print_function import string +import resource + from htmlgen import HTMLgen2 as HT @@ -15,22 +17,38 @@ from pprint import pformat as pf from flask import Flask, g -class GeneralTrait: +def print_mem(stage=""): + mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print("{}: {}".format(stage, mem/1024)) + +class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, Published phenotype, genotype, or user input trait """ - def __init__(self, **kw): - print("in GeneralTrait") - self.dataset = kw.get('dataset') # database name + def __init__(self, get_qtl_info=False, **kw): + # xor assertion + assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + if kw.get('dataset_name'): + self.dataset = create_dataset(kw.get('dataset_name')) + else: + self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet self.data = kw.get('data', {}) + + # Sets defaultst + self.locus = None + self.lrs = None + self.pvalue = None + self.mean = None + self.num_overlap = None + if kw.get('fullname'): name2 = value.split("::") @@ -39,13 +57,12 @@ class GeneralTrait: # self.cellid is set to None above elif len(name2) == 3: self.dataset, self.name, self.cellid = name2 - - self.dataset = create_dataset(self.dataset) # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary - self.retrieve_info() + self.retrieve_info(get_qtl_info=get_qtl_info) self.retrieve_sample_data() + def get_name(self): @@ -78,7 +95,7 @@ class GeneralTrait: #desc = self.handle_pca(desc) stringy = desc return stringy - + def display_name(self): @@ -208,7 +225,7 @@ class GeneralTrait: # ''' % (self.cellid, self.name, self.dataset.name) # #else: - results = self.dataset.retrieve_sample_data(self) + results = self.dataset.retrieve_sample_data(self.name) # Todo: is this necessary? If not remove self.data.clear() @@ -229,7 +246,7 @@ class GeneralTrait: #def items(self): # return self.__dict__.items() - def retrieve_info(self, QTL=False): + def retrieve_info(self, get_qtl_info=False): assert self.dataset, "Dataset doesn't exist" if self.dataset.type == 'Publish': query = """ @@ -251,7 +268,7 @@ class GeneralTrait: PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - traitInfo = g.db.execute(query).fetchone() + trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif self.dataset.type == 'ProbeSet': @@ -268,8 +285,8 @@ class GeneralTrait: """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name)) - traitInfo = g.db.execute(query).fetchone() - print("traitInfo is: ", pf(traitInfo)) + trait_info = g.db.execute(query).fetchone() + #print("trait_info is: ", pf(trait_info)) #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif self.dataset.type == 'Geno': @@ -286,23 +303,24 @@ class GeneralTrait: """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name)) - traitInfo = g.db.execute(query).fetchone() - print("traitInfo is: ", pf(traitInfo)) + trait_info = g.db.execute(query).fetchone() + #print("trait_info is: ", pf(trait_info)) else: #Temp type query = """SELECT %s FROM %s WHERE Name = %s """ % (string.join(self.dataset.display_fields,','), self.dataset.type, self.name) - traitInfo = g.db.execute(query).fetchone() + trait_info = g.db.execute(query).fetchone() #self.cursor.execute(query) - #traitInfo = self.cursor.fetchone() - if traitInfo: + #trait_info = self.cursor.fetchone() + if trait_info: self.haveinfo = True #XZ: assign SQL query result to trait attributes. for i, field in enumerate(self.dataset.display_fields): - setattr(self, field, traitInfo[i]) + print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i])) + setattr(self, field, trait_info[i]) if self.dataset.type == 'Publish': self.confidential = 0 @@ -310,55 +328,76 @@ class GeneralTrait: self.confidential = 1 self.homologeneid = None + + print("self.geneid is:", self.geneid) + print(" type:", type(self.geneid)) + print("self.dataset.group.name is:", self.dataset.group.name) if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. #XZ: The geneid values in database should be cleaned up. - try: - junk = float(self.geneid) - geneidIsNumber = 1 - except: - geneidIsNumber = 0 - - if geneidIsNumber: - query = """ - SELECT - HomologeneId - FROM - Homologene, Species, InbredSet - WHERE - Homologene.GeneId =%s AND - InbredSet.Name = '%s' AND - InbredSet.SpeciesId = Species.Id AND - Species.TaxonomyId = Homologene.TaxonomyId - """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) - result = g.db.execute(query).fetchone() - else: - result = None + #try: + # float(self.geneid) + # geneidIsNumber = True + #except ValueError: + # geneidIsNumber = False + + #if geneidIsNumber: + + + query = """ + SELECT + HomologeneId + FROM + Homologene, Species, InbredSet + WHERE + Homologene.GeneId =%s AND + InbredSet.Name = '%s' AND + InbredSet.SpeciesId = Species.Id AND + Species.TaxonomyId = Homologene.TaxonomyId + """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) + print("-> query is:", query) + result = g.db.execute(query).fetchone() + #else: + # result = None if result: self.homologeneid = result[0] - if QTL: + if get_qtl_info: if self.dataset.type == 'ProbeSet' and not self.cellid: - traitQTL = g.db.execute(""" + query = """ SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean FROM ProbeSetXRef, ProbeSet WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSet.Name = "%s" AND - ProbeSetXRef.ProbeSetFreezeId =%s - """, (self.name, self.dataset.id)).fetchone() + ProbeSet.Name = "{}" AND + ProbeSetXRef.ProbeSetFreezeId ={} + """.format(self.name, self.dataset.id) + trait_qtl = g.db.execute(query).fetchone() #self.cursor.execute(query) - #traitQTL = self.cursor.fetchone() - if traitQTL: - self.locus, self.lrs, self.pvalue, self.mean = traitQTL + #trait_qtl = self.cursor.fetchone() + if trait_qtl: + self.locus, self.lrs, self.pvalue, self.mean = trait_qtl + if self.locus: + query = """ + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '{}' and + Geno.Name = '{}' and + Geno.SpeciesId = Species.Id + """.format(self.dataset.group.species, self.locus) + print("query is:", query) + result = g.db.execute(query).fetchone() + self.locus_chr = result[0] + self.locus_mb = result[1] else: - self.locus = self.lrs = self.pvalue = self.mean = "" + self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = "" + + if self.dataset.type == 'Publish': - traitQTL = g.db.execute(""" + trait_qtl = g.db.execute(""" SELECT PublishXRef.Locus, PublishXRef.LRS FROM @@ -369,9 +408,9 @@ class GeneralTrait: PublishFreeze.Id =%s """, (self.name, self.dataset.id)).fetchone() #self.cursor.execute(query) - #traitQTL = self.cursor.fetchone() - if traitQTL: - self.locus, self.lrs = traitQTL + #trait_qtl = self.cursor.fetchone() + if trait_qtl: + self.locus, self.lrs = trait_qtl else: self.locus = self.lrs = "" else: diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 755595e0..a811c3cd 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -52,19 +52,22 @@ ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?sp SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'web/' +PYLMM_PATH = '/home/zas1024/' +SNP_PATH = '/mnt/xvdf1/snps/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' -TMPDIR = '/tmp/' +TMPDIR = HTMLPATH + 'tmp/' GENODIR = HTMLPATH + 'genotypes/' +NEWGENODIR = HTMLPATH + 'new_genotypes/' GENO_ARCHIVE_DIR = GENODIR + 'archive/' TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/' CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/' ChangableHtmlPath = GNROOT + 'web/' SITENAME = 'GN' -PORTADDR = "http://132.192.47.32" -BASEHREF = '<base href="http://132.192.47.32/">' +PORTADDR = "http://50.16.251.170" +BASEHREF = '<base href="http://50.16.251.170/">' INFOPAGEHREF = '/dbdoc/%s.html' GLOSSARYFILE = "/glossary.html" CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py index 84686234..abaeff93 100755 --- a/wqflask/base/webqtlConfigLocal.py +++ b/wqflask/base/webqtlConfigLocal.py @@ -2,18 +2,18 @@ # Environment Variables - private ######################################### -MYSQL_SERVER = 'localhost' -DB_NAME = 'db_webqtl_zas1024' +MYSQL_SERVER = 'gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com' +DB_NAME = 'db_webqtl' DB_USER = 'webqtl' -DB_PASSWD = 'webqtl' +DB_PASSWD = 'f2ZypIflRM' -MYSQL_UPDSERVER = 'localhost' -DB_UPDNAME = 'db_webqtl_zas1024' +MYSQL_UPDSERVER = 'gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com' +DB_UPDNAME = 'db_webqtl' DB_UPDUSER = 'webqtl' -DB_UPDPASSWD = 'webqtl' +DB_UPDPASSWD = 'f2ZypIflRM' -GNROOT = '/home/zas1024/gn/' -ROOT_URL = 'http://alexandria.uthsc.edu:91/' +GNROOT = '/home/zas1024/gene/' +ROOT_URL = 'http://50.16.251.170' PythonPath = '/usr/bin/python' PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' |