diff options
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x | wqflask/base/data_set.py | 202 | ||||
-rwxr-xr-x | wqflask/base/mrna_assay_tissue_data.py | 7 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 52 | ||||
-rwxr-xr-x | wqflask/base/webqtlCaseData.py | 2 | ||||
-rwxr-xr-x | wqflask/base/webqtlConfig.py | 2 |
5 files changed, 188 insertions, 77 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 489bd374..d6a46c2e 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -29,6 +29,7 @@ import json import gzip import cPickle as pickle import itertools +from operator import itemgetter from redis import Redis Redis = Redis() @@ -42,7 +43,7 @@ from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench -from wqflask.my_pylmm.pyLMM import chunks +from utility import chunks from maintenance import get_group_samplelists @@ -88,7 +89,7 @@ class Dataset_Types(object): for group in data['datasets'][species]: for dataset_type in data['datasets'][species][group]: for dataset in data['datasets'][species][group][dataset_type]: - print("dataset is:", dataset) + #print("dataset is:", dataset) short_dataset_name = dataset[0] if dataset_type == "Phenotypes": @@ -162,8 +163,6 @@ class Markers(object): for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): marker['chr'] = int(marker['chr']) - #else: - # marker['chr'] = 20 print("Mb:", marker['Mb']) marker['Mb'] = float(marker['Mb']) @@ -278,7 +277,7 @@ class DatasetGroup(object): """ def __init__(self, dataset): """This sets self.group and self.group_id""" - print("dataset name:", dataset.name) + print("DATASET NAME2:", dataset.name) self.name, self.id = g.db.execute(dataset.query_for_group).fetchone() if self.name == 'BXD300': self.name = "BXD" @@ -292,6 +291,7 @@ class DatasetGroup(object): self.incparentsf1 = False self.allsamples = None + self._datasets = None def get_specified_markers(self, markers = []): self.markers = HumanMarkers(self.name, markers) @@ -305,6 +305,75 @@ class DatasetGroup(object): self.markers = marker_class(self.name) + def datasets(self): + key = "group_dataset_menu:v2:" + self.name + print("key is2:", key) + #with Bench("Loading cache"): + # result = Redis.get(key) + #if result: + # self._datasets = pickle.loads(result) + # return self._datasets + + dataset_menu = [] + print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH) + print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH)) + results = g.db.execute(''' + (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name + FROM PublishFreeze,InbredSet + WHERE PublishFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name = %s + and PublishFreeze.public > %s) + UNION + (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name + FROM GenoFreeze, InbredSet + WHERE GenoFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name = %s + and GenoFreeze.public > %s) + UNION + (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name + FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id + and ProbeFreeze.TissueId = Tissue.Id + and ProbeFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name like %s + and ProbeSetFreeze.public > %s + ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId) + ''', (self.name, webqtlConfig.PUBLICTHRESH, + self.name, webqtlConfig.PUBLICTHRESH, + "%" + self.name + "%", webqtlConfig.PUBLICTHRESH)) + + the_results = results.fetchall() + + #for tissue_name, dataset in itertools.groupby(the_results, itemgetter(0)): + for dataset_item in the_results: + tissue_name = dataset_item[0] + dataset = dataset_item[1] + dataset_short = dataset_item[2] + if tissue_name in ['#PublishFreeze', '#GenoFreeze']: + dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) + else: + dataset_sub_menu = [item[1:] for item in dataset] + + tissue_already_exists = False + tissue_position = None + for i, tissue_dict in enumerate(dataset_menu): + if tissue_dict['tissue'] == tissue_name: + tissue_already_exists = True + tissue_position = i + break + + if tissue_already_exists: + print("dataset_menu:", dataset_menu[i]['datasets']) + dataset_menu[i]['datasets'].append((dataset, dataset_short)) + else: + dataset_menu.append(dict(tissue=tissue_name, + datasets=[(dataset, dataset_short)])) + + Redis.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL)) + Redis.expire(key, 60*5) + self._datasets = dataset_menu + + return self._datasets def get_f1_parent_strains(self): try: @@ -319,7 +388,7 @@ class DatasetGroup(object): self.parlist = [maternal, paternal] def get_samplelist(self): - key = "samplelist:v4:" + self.name + key = "samplelist:v2:" + self.name print("key is:", key) with Bench("Loading cache"): result = Redis.get(key) @@ -332,14 +401,29 @@ class DatasetGroup(object): print(" self.samplelist: ", self.samplelist) else: print("Cache not hit") - try: - self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno") - except IOError: + + from utility.tools import plink_command + PLINK_PATH,PLINK_COMMAND = plink_command() + + geno_file_path = webqtlConfig.GENODIR+self.name+".geno" + plink_file_path = PLINK_PATH+"/"+self.name+".fam" + + if os.path.isfile(plink_file_path): + self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) + elif os.path.isfile(geno_file_path): + self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) + else: self.samplelist = None print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) + def all_samples_ordered(self): + result = [] + lists = (self.parlist, self.f1list, self.samplelist) + [result.extend(l) for l in lists if l] + return result + def read_genotype_file(self): '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': @@ -434,6 +518,8 @@ class DataSet(object): self.group.get_samplelist() self.species = species.TheSpecies(self) + print("TESTING!!!") + def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" @@ -473,29 +559,39 @@ class DataSet(object): This is not meant to retrieve the data set info if no name at all is passed. """ - - query_args = tuple(escape(x) for x in ( - (self.type + "Freeze"), - str(webqtlConfig.PUBLICTHRESH), - self.name, - self.name, - self.name)) - print("query_args are:", query_args) - - #print(""" - # SELECT Id, Name, FullName, ShortName - # FROM %s - # WHERE public > %s AND - # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') - # """ % (query_args)) try: - self.id, self.name, self.fullname, self.shortname = g.db.execute(""" - SELECT Id, Name, FullName, ShortName - FROM %s - WHERE public > %s AND - (Name = '%s' OR FullName = '%s' OR ShortName = '%s') - """ % (query_args)).fetchone() + if self.type == "ProbeSet": + query_args = tuple(escape(x) for x in ( + str(webqtlConfig.PUBLICTHRESH), + self.name, + self.name, + self.name)) + + self.id, self.name, self.fullname, self.shortname, self.tissue = g.db.execute(""" + SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, Tissue.Name + FROM ProbeSetFreeze, ProbeFreeze, Tissue + WHERE ProbeSetFreeze.public > %s AND + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + ProbeFreeze.TissueId = Tissue.Id AND + (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s') + """ % (query_args)).fetchone() + else: + query_args = tuple(escape(x) for x in ( + (self.type + "Freeze"), + str(webqtlConfig.PUBLICTHRESH), + self.name, + self.name, + self.name)) + + self.tissue = "N/A" + self.id, self.name, self.fullname, self.shortname = g.db.execute(""" + SELECT Id, Name, FullName, ShortName + FROM %s + WHERE public > %s AND + (Name = '%s' OR FullName = '%s' OR ShortName = '%s') + """ % (query_args)).fetchone() + except TypeError: print("Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass @@ -633,14 +729,14 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields = ['', - 'ID', + self.header_fields = ['Index', + 'Record', 'Description', 'Authors', 'Year', 'Max LRS', 'Max LRS Location', - 'Add. Effect<a href="http://genenetwork.org//glossary.html#A" target="_blank"><sup style="color:#f00"> ?</sup></a>'] + 'Additive Effect'] self.type = 'Publish' @@ -719,7 +815,6 @@ class PhenotypeDataSet(DataSet): Geno.Name = %s and Geno.SpeciesId = Species.Id """, (species, this_trait.locus)).fetchone() - #result = self.cursor.fetchone() if result: if result[0] and result[1]: @@ -737,7 +832,7 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb)) + this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): query = """ @@ -753,11 +848,11 @@ class PhenotypeDataSet(DataSet): WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND - PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id + PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id Order BY Strain.Name - """ % (trait, self.id) - results = g.db.execute(query).fetchall() + """ + results = g.db.execute(query, (trait, self.id)).fetchall() return results @@ -777,7 +872,7 @@ class GenotypeDataSet(DataSet): 'sequence'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Location'] @@ -828,7 +923,7 @@ class GenotypeDataSet(DataSet): else: trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) ) + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) ) this_trait.location_value = trait_location_value def retrieve_sample_data(self, trait): @@ -840,15 +935,17 @@ class GenotypeDataSet(DataSet): left join GenoSE on (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) WHERE - Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND + Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = '%s' AND + GenoFreeze.Name = %s AND GenoXRef.DataId = GenoData.Id AND GenoData.StrainId = Strain.Id Order BY Strain.Name - """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name) - results = g.db.execute(query).fetchall() + """ + results = g.db.execute(query, + (webqtlDatabaseFunction.retrieve_species_id(self.group.name), + trait, self.name)).fetchall() return results @@ -893,15 +990,15 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields = ['', - 'ID', + self.header_fields = ['Index', + 'Record', 'Symbol', 'Description', 'Location', - 'Mean Expr', + 'Mean', 'Max LRS', 'Max LRS Location', - 'Add. Effect<a href="http://genenetwork.org//glossary.html#A" target="_blank"><sup style="color:#f00"> ?</sup></a>'] + 'Additive Effect'] # Todo: Obsolete or rename this field self.type = 'ProbeSet' @@ -1055,7 +1152,7 @@ class MrnaAssayDataSet(DataSet): # this_trait.mb) #ZS: Put this in function currently called "convert_location_to_value" - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb)) this_trait.location_value = trait_location_value @@ -1074,7 +1171,8 @@ class MrnaAssayDataSet(DataSet): mean = result[0] if result else 0 - this_trait.mean = "%2.3f" % mean + if mean: + this_trait.mean = "%2.3f" % mean #LRS and its location this_trait.LRS_score_repr = 'N/A' @@ -1111,7 +1209,7 @@ class MrnaAssayDataSet(DataSet): this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) + this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb)) def convert_location_to_value(self, chromosome, mb): @@ -1159,7 +1257,7 @@ class MrnaAssayDataSet(DataSet): Strain.Name """ % (escape(trait), escape(self.name)) results = g.db.execute(query).fetchall() - print("RETRIEVED RESULTS HERE:", results) + #print("RETRIEVED RESULTS HERE:", results) return results diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 1a05fce7..54a7ce8e 100755 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -40,7 +40,6 @@ class MrnaAssayTissueData(object): # with highest mean value # Due to the limit size of TissueProbeSetFreezeId table in DB, # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) - #print("len(gene_symbols): ", len(gene_symbols)) if len(gene_symbols) == 0: query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol @@ -49,6 +48,8 @@ class MrnaAssayTissueData(object): else: in_clause = db_tools.create_in_clause(gene_symbols) + #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower + query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; @@ -58,8 +59,8 @@ class MrnaAssayTissueData(object): for result in results: symbol = result[0] - if symbol in gene_symbols: - #gene_symbols.append(symbol) + if symbol.lower() in [gene_symbol.lower() for gene_symbol in gene_symbols]: + #gene_symbols.append(symbol) symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 58bed865..ff80795c 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -53,7 +53,8 @@ class GeneralTrait(object): self.pvalue = None self.mean = None self.num_overlap = None - + self.strand_probe = None + self.symbol = None if kw.get('fullname'): name2 = value.split("::") @@ -250,14 +251,7 @@ class GeneralTrait(object): # Todo: is this necessary? If not remove self.data.clear() - if self.dataset.group.parlist: - all_samples_ordered = (self.dataset.group.parlist + - self.dataset.group.f1list + - self.dataset.group.samplelist) - elif self.dataset.group.f1list: - all_samples_ordered = self.dataset.group.f1list + self.dataset.group.samplelist - else: - all_samples_ordered = self.dataset.group.samplelist + all_samples_ordered = self.dataset.group.all_samples_ordered() if results: for item in results: @@ -297,7 +291,7 @@ class GeneralTrait(object): PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - print("query is:", query) + print("query is:", query) trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name @@ -336,10 +330,10 @@ class GeneralTrait(object): trait_info = g.db.execute(query).fetchone() #print("trait_info is: ", pf(trait_info)) else: #Temp type - query = """SELECT %s FROM %s WHERE Name = %s - """ % (string.join(self.dataset.display_fields,','), - self.dataset.type, self.name) - trait_info = g.db.execute(query).fetchone() + query = """SELECT %s FROM %s WHERE Name = %s""" + trait_info = g.db.execute(query, + (string.join(self.dataset.display_fields,','), + self.dataset.type, self.name)).fetchone() if trait_info: self.haveinfo = True @@ -422,6 +416,8 @@ class GeneralTrait(object): if result: self.locus_chr = result[0] self.locus_mb = result[1] + else: + self.locus = self.locus_chr = self.locus_mb = "" else: self.locus = self.locus_chr = self.locus_mb = "" else: @@ -533,12 +529,27 @@ class GeneralTrait(object): return setDescription @property + def name_header_fmt(self): + '''Return a human-readable name for use in page header''' + if self.dataset.type == 'ProbeSet': + return self.symbol + elif self.dataset.type == 'Geno': + return self.name + elif self.dataset.type == 'Publish': + return self.post_publication_abbreviation + else: + return "unnamed" + + @property def description_fmt(self): '''Return a text formated description''' - if self.description: - formatted = self.description - if self.probe_target_description: - formatted += "; " + self.probe_target_description + if self.dataset.type == 'ProbeSet': + if self.description: + formatted = self.description + if self.probe_target_description: + formatted += "; " + self.probe_target_description + elif self.dataset.type == 'Publish': + formatted = self.post_publication_description else: formatted = "Not available" return formatted.capitalize() @@ -549,6 +560,9 @@ class GeneralTrait(object): if self.alias: alias = string.replace(self.alias, ";", " ") alias = string.join(string.split(alias), ", ") + else: + alias = 'Not available' + return alias @@ -649,4 +663,4 @@ def get_sample_data(): # jsonable_sample_data[sample] = trait_ob.data[sample].value # #return jsonable_sample_data -
\ No newline at end of file + diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 5927b0f4..42763aed 100755 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -34,8 +34,6 @@ class webqtlCaseData(object): self.value = value # Trait Value self.variance = variance # Trait Variance self.num_cases = num_cases # Number of individuals/cases - self.prob_plot_value = None # Ordered value for probability plot; this is sort of wrong but not sure how else to do this - self.z_score = None self.extra_attributes = None self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) self.outlier = None # Not set to True/False until later diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 48d8cd0a..330fec56 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -53,7 +53,7 @@ GNROOT = "/home/zas1024/gene/" # Will remove this and dependent items later SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'genotype_files/' -PYLMM_PATH = '/home/zas1024/plink/' +PYLMM_PATH = '/home/zas1024/plink_gemma/' SNP_PATH = '/home/zas1024/snps/' IMGDIR = GNROOT + '/wqflask/wqflask/images/' IMAGESPATH = HTMLPATH + 'images/' |