diff options
Diffstat (limited to 'wqflask/base/data_set.py')
-rwxr-xr-x | wqflask/base/data_set.py | 105 |
1 files changed, 87 insertions, 18 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 1cd57b4b..414cc71a 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -29,6 +29,7 @@ import json import gzip import cPickle as pickle import itertools +from operator import itemgetter from redis import Redis Redis = Redis() @@ -292,6 +293,7 @@ class DatasetGroup(object): self.incparentsf1 = False self.allsamples = None + self._datasets = None def get_specified_markers(self, markers = []): self.markers = HumanMarkers(self.name, markers) @@ -305,6 +307,56 @@ class DatasetGroup(object): self.markers = marker_class(self.name) + def datasets(self): + key = "group_dataset_menu:v1:" + self.name + print("key is:", key) + with Bench("Loading cache"): + result = Redis.get(key) + if result: + self._datasets = pickle.loads(result) + return self._datasets + + dataset_menu = [] + print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH) + print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH)) + results = g.db.execute(''' + (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name + FROM PublishFreeze,InbredSet + WHERE PublishFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name = %s + and PublishFreeze.public > %s) + UNION + (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name + FROM GenoFreeze, InbredSet + WHERE GenoFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name = %s + and GenoFreeze.public > %s) + UNION + (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name + FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id + and ProbeFreeze.TissueId = Tissue.Id + and ProbeFreeze.InbredSetId = InbredSet.Id + and InbredSet.Name like %s + and ProbeSetFreeze.public > %s + ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId) + ''', (self.name, webqtlConfig.PUBLICTHRESH, + self.name, webqtlConfig.PUBLICTHRESH, + "%" + self.name + "%", webqtlConfig.PUBLICTHRESH)) + + for tissue_name, dataset in itertools.groupby(results.fetchall(), itemgetter(0)): + if tissue_name in ['#PublishFreeze', '#GenoFreeze']: + for item in dataset: + dataset_menu.append(dict(tissue=None, datasets=[item[1:]])) + else: + dataset_sub_menu = [item[1:] for item in dataset] + dataset_menu.append(dict(tissue=tissue_name, + datasets=dataset_sub_menu)) + + Redis.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL)) + Redis.expire(key, 60*5) + self._datasets = dataset_menu + return self._datasets def get_f1_parent_strains(self): try: @@ -319,7 +371,7 @@ class DatasetGroup(object): self.parlist = [maternal, paternal] def get_samplelist(self): - key = "samplelist:v4:" + self.name + key = "samplelist:v2:" + self.name print("key is:", key) with Bench("Loading cache"): result = Redis.get(key) @@ -332,14 +384,29 @@ class DatasetGroup(object): print(" self.samplelist: ", self.samplelist) else: print("Cache not hit") - try: - self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno") - except IOError: + + from utility.tools import plink_command + PLINK_PATH,PLINK_COMMAND = plink_command() + + geno_file_path = webqtlConfig.GENODIR+self.name+".geno" + plink_file_path = PLINK_PATH+"/"+self.name+".fam" + + if os.path.isfile(plink_file_path): + self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) + elif os.path.isfile(geno_file_path): + self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) + else: self.samplelist = None print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) + def all_samples_ordered(self): + result = [] + lists = (self.parlist, self.f1list, self.samplelist) + [result.extend(l) for l in lists if l] + return result + def read_genotype_file(self): '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': @@ -633,7 +700,7 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Description', 'Authors', @@ -737,7 +804,7 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb)) + this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): query = """ @@ -753,11 +820,11 @@ class PhenotypeDataSet(DataSet): WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND - PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id + PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id Order BY Strain.Name - """ % (trait, self.id) - results = g.db.execute(query).fetchall() + """ + results = g.db.execute(query, (trait, self.id)).fetchall() return results @@ -777,7 +844,7 @@ class GenotypeDataSet(DataSet): 'sequence'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Location'] @@ -828,7 +895,7 @@ class GenotypeDataSet(DataSet): else: trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) ) + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) ) this_trait.location_value = trait_location_value def retrieve_sample_data(self, trait): @@ -840,15 +907,17 @@ class GenotypeDataSet(DataSet): left join GenoSE on (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) WHERE - Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND + Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = '%s' AND + GenoFreeze.Name = %s AND GenoXRef.DataId = GenoData.Id AND GenoData.StrainId = Strain.Id Order BY Strain.Name - """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name) - results = g.db.execute(query).fetchall() + """ + results = g.db.execute(query, + (webqtlDatabaseFunction.retrieve_species_id(self.group.name), + trait, self.name)).fetchall() return results @@ -893,7 +962,7 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields = ['', + self.header_fields = ['Index', 'ID', 'Symbol', 'Description', @@ -1055,7 +1124,7 @@ class MrnaAssayDataSet(DataSet): # this_trait.mb) #ZS: Put this in function currently called "convert_location_to_value" - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, + this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb)) this_trait.location_value = trait_location_value @@ -1111,7 +1180,7 @@ class MrnaAssayDataSet(DataSet): this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) + this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb)) def convert_location_to_value(self, chromosome, mb): |