diff options
author | Lei Yan | 2015-07-10 20:12:00 +0000 |
---|---|---|
committer | Lei Yan | 2015-07-10 20:12:00 +0000 |
commit | aa159a17785cc415e81346963aa76f05f5f9d4ad (patch) | |
tree | ae0502dac39d8cf20115b58817df4931a03b727c /wqflask/base | |
parent | 239c5ff97a88bd9ae3c439ca244daca2696fe68b (diff) | |
parent | 840285e3533790760b763aaa43d3099f9b0a5d69 (diff) | |
download | genenetwork2-aa159a17785cc415e81346963aa76f05f5f9d4ad.tar.gz |
Merge https://github.com/genenetwork/genenetwork2
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x | wqflask/base/data_set.py | 105 | ||||
-rwxr-xr-x | wqflask/base/mrna_assay_tissue_data.py | 6 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 21 | ||||
-rwxr-xr-x | wqflask/base/webqtlCaseData.py | 2 | ||||
-rwxr-xr-x | wqflask/base/webqtlConfig.py | 2 |
5 files changed, 99 insertions, 37 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 1cd57b4b..414cc71a 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -29,6 +29,7 @@ import json import gzip import cPickle as pickle import itertools +from operator import itemgetter from redis import Redis Redis = Redis() @@ -292,6 +293,7 @@ class DatasetGroup(object): self.incparentsf1 = False self.allsamples = None + self._datasets = None def get_specified_markers(self, markers = []): self.markers = HumanMarkers(self.name, markers) @@ -305,6 +307,56 @@ class DatasetGroup(object): self.markers = marker_class(self.name) + def datasets(self): + key = "group_dataset_menu:v1:" + self.name + print("key is:", key) + with Bench("Loading cache"): + result = Redis.get(key) + if result: + self._datasets = pickle.loads(result) + return self._datasets + + dataset_menu = [] + print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH) + print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH)) + results = g.db.execute(''' + (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name + FROM PublishFreeze,InbredSet + WHERE PublishFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name = %s + and PublishFreeze.public > %s) + UNION + (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name + FROM GenoFreeze, InbredSet + WHERE GenoFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name = %s + and GenoFreeze.public > %s) + UNION + (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name + FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id + and ProbeFreeze.TissueId = Tissue.Id + and ProbeFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name like %s + and ProbeSetFreeze.public > %s + ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId) + ''', (self.name, webqtlConfig.PUBLICTHRESH, + self.name, webqtlConfig.PUBLICTHRESH, + "%" + self.name + "%", webqtlConfig.PUBLICTHRESH)) + + for tissue_name, dataset in itertools.groupby(results.fetchall(), itemgetter(0)): + if tissue_name in ['#PublishFreeze', '#GenoFreeze']: + for item in dataset: + dataset_menu.append(dict(tissue=None, datasets=[item[1:]])) + else: + dataset_sub_menu = [item[1:] for item in dataset] + dataset_menu.append(dict(tissue=tissue_name, + datasets=dataset_sub_menu)) + + Redis.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL)) + Redis.expire(key, 60*5) + self._datasets = dataset_menu + return self._datasets def get_f1_parent_strains(self): try: @@ -319,7 +371,7 @@ class DatasetGroup(object): self.parlist = [maternal, paternal] def get_samplelist(self): - key = "samplelist:v4:" + self.name + key = "samplelist:v2:" + self.name print("key is:", key) with Bench("Loading cache"): result = Redis.get(key) @@ -332,14 +384,29 @@ class DatasetGroup(object): print(" self.samplelist: ", self.samplelist) else: print("Cache not hit") - try: - self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno") - except IOError: + + from utility.tools import plink_command + PLINK_PATH,PLINK_COMMAND = plink_command() + + geno_file_path = webqtlConfig.GENODIR+self.name+".geno" + plink_file_path = PLINK_PATH+"/"+self.name+".fam" + + if os.path.isfile(plink_file_path): + self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) + elif os.path.isfile(geno_file_path): + self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) + else: self.samplelist = None print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) + def all_samples_ordered(self): + result = [] + lists = (self.parlist, self.f1list, self.samplelist) + [result.extend(l) for l in lists if l] + return result + def read_genotype_file(self): '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': @@ -633,7 +700,7 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Description', 'Authors', @@ -737,7 +804,7 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb)) + this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): query = """ @@ -753,11 +820,11 @@ class PhenotypeDataSet(DataSet): WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND - PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id + PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id Order BY Strain.Name - """ % (trait, self.id) - results = g.db.execute(query).fetchall() + """ + results = g.db.execute(query, (trait, self.id)).fetchall() return results @@ -777,7 +844,7 @@ class GenotypeDataSet(DataSet): 'sequence'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Location'] @@ -828,7 +895,7 @@ class GenotypeDataSet(DataSet): else: trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) ) + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) ) this_trait.location_value = trait_location_value def retrieve_sample_data(self, trait): @@ -840,15 +907,17 @@ class GenotypeDataSet(DataSet): left join GenoSE on (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) WHERE - Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND + Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = '%s' AND + GenoFreeze.Name = %s AND GenoXRef.DataId = GenoData.Id AND GenoData.StrainId = Strain.Id Order BY Strain.Name - """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name) - results = g.db.execute(query).fetchall() + """ + results = g.db.execute(query, + (webqtlDatabaseFunction.retrieve_species_id(self.group.name), + trait, self.name)).fetchall() return results @@ -893,7 +962,7 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Symbol', 'Description', @@ -1055,7 +1124,7 @@ class MrnaAssayDataSet(DataSet): # this_trait.mb) #ZS: Put this in function currently called "convert_location_to_value" - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb)) this_trait.location_value = trait_location_value @@ -1111,7 +1180,7 @@ class MrnaAssayDataSet(DataSet): this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) + this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb)) def convert_location_to_value(self, chromosome, mb): diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 1a05fce7..b2c0448a 100755 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -51,15 +51,15 @@ class MrnaAssayTissueData(object): query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol - and t.Mean = x.maxmean; + and t.Mean = x.maxmean;http://docs.python.org/2/library/string.html?highlight=lower#string.lower '''.format(in_clause) results = g.db.execute(query).fetchall() for result in results: symbol = result[0] - if symbol in gene_symbols: - #gene_symbols.append(symbol) + if symbol.lower() in [gene_symbol.lower() for gene_symbol in gene_symbols]: + #gene_symbols.append(symbol) symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 8930c917..ff80795c 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -251,14 +251,7 @@ class GeneralTrait(object): # Todo: is this necessary? If not remove self.data.clear() - if self.dataset.group.parlist: - all_samples_ordered = (self.dataset.group.parlist + - self.dataset.group.f1list + - self.dataset.group.samplelist) - elif self.dataset.group.f1list: - all_samples_ordered = self.dataset.group.f1list + self.dataset.group.samplelist - else: - all_samples_ordered = self.dataset.group.samplelist + all_samples_ordered = self.dataset.group.all_samples_ordered() if results: for item in results: @@ -298,7 +291,7 @@ class GeneralTrait(object): PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - print("query is:", query) + print("query is:", query) trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name @@ -337,10 +330,10 @@ class GeneralTrait(object): trait_info = g.db.execute(query).fetchone() #print("trait_info is: ", pf(trait_info)) else: #Temp type - query = """SELECT %s FROM %s WHERE Name = %s - """ % (string.join(self.dataset.display_fields,','), - self.dataset.type, self.name) - trait_info = g.db.execute(query).fetchone() + query = """SELECT %s FROM %s WHERE Name = %s""" + trait_info = g.db.execute(query, + (string.join(self.dataset.display_fields,','), + self.dataset.type, self.name)).fetchone() if trait_info: self.haveinfo = True @@ -423,6 +416,8 @@ class GeneralTrait(object): if result: self.locus_chr = result[0] self.locus_mb = result[1] + else: + self.locus = self.locus_chr = self.locus_mb = "" else: self.locus = self.locus_chr = self.locus_mb = "" else: diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 5927b0f4..42763aed 100755 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -34,8 +34,6 @@ class webqtlCaseData(object): self.value = value # Trait Value self.variance = variance # Trait Variance self.num_cases = num_cases # Number of individuals/cases - self.prob_plot_value = None # Ordered value for probability plot; this is sort of wrong but not sure how else to do this - self.z_score = None self.extra_attributes = None self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) self.outlier = None # Not set to True/False until later diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 48d8cd0a..330fec56 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -53,7 +53,7 @@ GNROOT = "/home/zas1024/gene/" # Will remove this and dependent items later SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'genotype_files/' -PYLMM_PATH = '/home/zas1024/plink/' +PYLMM_PATH = '/home/zas1024/plink_gemma/' SNP_PATH = '/home/zas1024/snps/' IMGDIR = GNROOT + '/wqflask/wqflask/images/' IMAGESPATH = HTMLPATH + 'images/' |