From 4f06f22071898171cb7bbc12f5dfd42b05dc3208 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 16 Jun 2021 21:58:00 +0000 Subject: Added function as_dict to data_set.py for situations where we don't want to pass around the whole dataset object --- wqflask/base/data_set.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 75ddf278..dc338971 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -559,6 +559,7 @@ class DataSet: self.fullname = None self.type = None self.data_scale = None # ZS: For example log2 + self.accession_id = None self.setup() @@ -575,6 +576,17 @@ class DataSet: self.group.get_samplelist() self.species = species.TheSpecies(self) + def as_dict(self): + return { + 'name': self.name, + 'shortname': self.shortname, + 'fullname': self.fullname, + 'type': self.type, + 'data_scale': self.data_scale, + 'group': self.group.name, + 'accession_id': self.accession_id + } + def get_accession_id(self): if self.type == "Publish": results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where -- cgit v1.2.3 From 73195c1af57cbcf0fa38f1e73e8878c02938ef4b Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 16 Jun 2021 21:58:43 +0000 Subject: Changed the function jsonable in trait.py to return a couple extra fields + take dataset as an optional argument (to avoid having to pointlessly create it) --- wqflask/base/trait.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index d09cfd40..10851e00 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -284,17 +284,19 @@ def get_sample_data(): return None -def jsonable(trait): +def jsonable(trait, dataset=None): """Return a dict suitable for using as json Actual turning into json doesn't happen here though""" - dataset = create_dataset(dataset_name=trait.dataset.name, - dataset_type=trait.dataset.type, - group_name=trait.dataset.group.name) + if not dataset: + dataset = create_dataset(dataset_name=trait.dataset.name, + dataset_type=trait.dataset.type, + group_name=trait.dataset.group.name) if dataset.type == "ProbeSet": return dict(name=trait.name, + view=trait.view, symbol=trait.symbol, dataset=dataset.name, dataset_name=dataset.shortname, @@ -308,37 +310,44 @@ def jsonable(trait): elif dataset.type == "Publish": if trait.pubmed_id: return dict(name=trait.name, + view=trait.view, dataset=dataset.name, dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, + pubmed_id=trait.pubmed_id, pubmed_text=trait.pubmed_text, pubmed_link=trait.pubmed_link, + mean=trait.mean, lrs_score=trait.LRS_score_repr, lrs_location=trait.LRS_location_repr, additive=trait.additive ) else: return dict(name=trait.name, + view=trait.view, dataset=dataset.name, dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, pubmed_text=trait.pubmed_text, + mean=trait.mean, lrs_score=trait.LRS_score_repr, lrs_location=trait.LRS_location_repr, additive=trait.additive ) elif dataset.type == "Geno": return dict(name=trait.name, + view=trait.view, dataset=dataset.name, dataset_name=dataset.shortname, location=trait.location_repr ) elif dataset.name == "Temp": return dict(name=trait.name, + view=trait.view, dataset="Temp", dataset_name="Temp") else: -- cgit v1.2.3 From db9b0a9df8879921217bac353eaa2c3440e8337a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 24 May 2021 16:35:11 +0300 Subject: add query for fetching probeset data --- wqflask/base/data_set.py | 258 +++++++++++++-------- wqflask/wqflask/correlation/correlation_gn3_api.py | 144 +++++++++++- 2 files changed, 303 insertions(+), 99 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index dc338971..7531ac81 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -166,7 +166,6 @@ class DatasetType: if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") - results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] @@ -278,7 +277,7 @@ class Markers: filtered_markers = [] for marker in self.markers: if marker['name'] in p_values: - #logger.debug("marker {} IS in p_values".format(i)) + # logger.debug("marker {} IS in p_values".format(i)) marker['p_value'] = p_values[marker['name']] if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): marker['lod_score'] = 0 @@ -299,7 +298,7 @@ class HumanMarkers(Markers): self.markers = [] for line in marker_data_fh: splat = line.strip().split() - #logger.debug("splat:", splat) + # logger.debug("splat:", splat) if len(specified_markers) > 0: if splat[1] in specified_markers: marker = {} @@ -441,7 +440,7 @@ class DatasetGroup: # genotype_1 is Dataset Object without parents and f1 # genotype_2 is Dataset Object with parents and f1 (not for intercross) - #genotype_1 = reaper.Dataset() + # genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: @@ -662,9 +661,39 @@ class DataSet: - def get_trait_data(self, sample_list=None): + + def chunk_dataset(self, dataset, n): + + + results = {} + + query = """ + SELECT ProbeSetXRef.DataId,ProbeSet.Name + FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze + WHERE ProbeSetFreeze.Name = '{}' AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetXRef.ProbeSetId = ProbeSet.Id + """.format(self.name) + + # should cache this + + traits_name_dict= dict(g.db.execute(query).fetchall()) + + + + + for i in range(0, len(dataset), n): + matrix = list(dataset[i:i + n]) + trait_name = traits_name_dict[matrix[0][0]] + + my_values = [value for (trait_name, strain, value) in matrix] + results[trait_name] = my_values + return results + + def get_probeset_data(self, sample_list=None, trait_ids=None): if sample_list: self.samplelist = sample_list + else: self.samplelist = self.group.samplelist @@ -678,27 +707,59 @@ class DataSet: and Strain.SpeciesId=Species.Id and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) - logger.sql(query) results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] + query = """SELECT * from ProbeSetData + where StrainID in {} + and id in (SELECT ProbeSetXRef.DataId + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = '{}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids),self.name) + + query_results=list(g.db.execute(query).fetchall()) + + data_results=self.chunk_dataset(query_results, len(sample_ids)) + self.trait_data=data_results + + def get_trait_data(self, sample_list=None): + if sample_list: + self.samplelist=sample_list + else: + self.samplelist=self.group.samplelist + + if self.group.parlist != None and self.group.f1list != None: + if (self.group.parlist + self.group.f1list) in self.samplelist: + self.samplelist += self.group.parlist + self.group.f1list + + query=""" + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) + logger.sql(query) + results=dict(g.db.execute(query).fetchall()) + sample_ids=[results[item] for item in self.samplelist] + # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks # Postgres doesn't have that limit, so we can get rid of this after we transition - chunk_size = 50 - number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) - trait_sample_data = [] + chunk_size=50 + number_chunks=int(math.ceil(len(sample_ids) / chunk_size)) + trait_sample_data=[] for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): if self.type == "Publish": - dataset_type = "Phenotype" + dataset_type="Phenotype" else: - dataset_type = self.type - temp = ['T%s.value' % item for item in sample_ids_step] + dataset_type=self.type + temp=['T%s.value' % item for item in sample_ids_step] if self.type == "Publish": - query = "SELECT {}XRef.Id,".format(escape(self.type)) + query="SELECT {}XRef.Id,".format(escape(self.type)) else: - query = "SELECT {}.Name,".format(escape(dataset_type)) - data_start_pos = 1 + query="SELECT {}.Name,".format(escape(dataset_type)) + data_start_pos=1 query += ', '.join(temp) query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, self.type, @@ -727,27 +788,27 @@ class DataSet: """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) - results = g.db.execute(query).fetchall() + results=g.db.execute(query).fetchall() trait_sample_data.append(results) - trait_count = len(trait_sample_data[0]) - self.trait_data = collections.defaultdict(list) + trait_count=len(trait_sample_data[0]) + self.trait_data=collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values for trait_counter in range(trait_count): - trait_name = trait_sample_data[0][trait_counter][0] + trait_name=trait_sample_data[0][trait_counter][0] for chunk_counter in range(int(number_chunks)): self.trait_data[trait_name] += ( trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) class PhenotypeDataSet(DataSet): - DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' + DS_NAME_MAP['Publish']='PhenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields = ['Phenotype.Post_publication_description', + self.search_fields=['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', @@ -760,7 +821,7 @@ class PhenotypeDataSet(DataSet): 'PublishXRef.Id'] # Figure out what display_fields is - self.display_fields = ['name', 'group_code', + self.display_fields=['name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', @@ -778,7 +839,7 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields = ['Index', + self.header_fields=['Index', 'Record', 'Description', 'Authors', @@ -787,9 +848,9 @@ class PhenotypeDataSet(DataSet): 'Max LRS Location', 'Additive Effect'] - self.type = 'Publish' + self.type='Publish' - self.query_for_group = ''' + self.query_for_group=''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -809,13 +870,13 @@ class PhenotypeDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieve_info(get_qtl_info=True) - description = this_trait.post_publication_description + description=this_trait.post_publication_description # If the dataset is confidential and the user has access to confidential # phenotype traits, then display the pre-publication description instead # of the post-publication description if this_trait.confidential: - this_trait.description_display = "" + this_trait.description_display="" continue # for now, because no authorization features if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( @@ -823,46 +884,46 @@ class PhenotypeDataSet(DataSet): userName=self.userName, authorized_users=this_trait.authorized_users): - description = this_trait.pre_publication_description + description=this_trait.pre_publication_description if len(description) > 0: - this_trait.description_display = description.strip() + this_trait.description_display=description.strip() else: - this_trait.description_display = "" + this_trait.description_display="" if not this_trait.year.isdigit(): - this_trait.pubmed_text = "N/A" + this_trait.pubmed_text="N/A" else: - this_trait.pubmed_text = this_trait.year + this_trait.pubmed_text=this_trait.year if this_trait.pubmed_id: - this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id + this_trait.pubmed_link=webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id # LRS and its location - this_trait.LRS_score_repr = "N/A" - this_trait.LRS_location_repr = "N/A" + this_trait.LRS_score_repr="N/A" + this_trait.LRS_location_repr="N/A" if this_trait.lrs: - query = """ + query=""" select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '%s' and Geno.Name = '%s' and Geno.SpeciesId = Species.Id """ % (species, this_trait.locus) logger.sql(query) - result = g.db.execute(query).fetchone() + result=g.db.execute(query).fetchone() if result: if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] + LRS_Chr=result[0] + LRS_Mb=result[1] - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( + this_trait.LRS_score_repr=LRS_score_repr='%3.1f' % this_trait.lrs + this_trait.LRS_location_repr=LRS_location_repr='Chr%s: %.6f' % ( LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): - query = """ + query=""" SELECT Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2 FROM @@ -880,34 +941,34 @@ class PhenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results = g.db.execute(query, (trait, self.id)).fetchall() + results=g.db.execute(query, (trait, self.id)).fetchall() return results class GenotypeDataSet(DataSet): - DS_NAME_MAP['Geno'] = 'GenotypeDataSet' + DS_NAME_MAP['Geno']='GenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields = ['Name', + self.search_fields=['Name', 'Chr'] # Find out what display_fields is - self.display_fields = ['name', + self.display_fields=['name', 'chr', 'mb', 'source2', 'sequence'] # Fields displayed in the search results table header - self.header_fields = ['Index', + self.header_fields=['Index', 'ID', 'Location'] # Todo: Obsolete or rename this field - self.type = 'Geno' + self.type='Geno' - self.query_for_group = ''' + self.query_for_group=''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -926,11 +987,11 @@ class GenotypeDataSet(DataSet): this_trait.retrieveInfo() if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( + this_trait.location_repr='Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) def retrieve_sample_data(self, trait): - query = """ + query=""" SELECT Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 FROM @@ -947,7 +1008,7 @@ class GenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results = g.db.execute(query, + results=g.db.execute(query, (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)).fetchall() return results @@ -961,11 +1022,11 @@ class MrnaAssayDataSet(DataSet): platform and is far too specific. ''' - DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet' + DS_NAME_MAP['ProbeSet']='MrnaAssayDataSet' def setup(self): # Fields in the database table - self.search_fields = ['Name', + self.search_fields=['Name', 'Description', 'Probe_Target_Description', 'Symbol', @@ -975,7 +1036,7 @@ class MrnaAssayDataSet(DataSet): 'RefSeq_TranscriptId'] # Find out what display_fields is - self.display_fields = ['name', 'symbol', + self.display_fields=['name', 'symbol', 'description', 'probe_target_description', 'chr', 'mb', 'alias', 'geneid', @@ -995,7 +1056,7 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields = ['Index', + self.header_fields=['Index', 'Record', 'Symbol', 'Description', @@ -1006,9 +1067,9 @@ class MrnaAssayDataSet(DataSet): 'Additive Effect'] # Todo: Obsolete or rename this field - self.type = 'ProbeSet' + self.type='ProbeSet' - self.query_for_group = ''' + self.query_for_group=''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -1026,7 +1087,7 @@ class MrnaAssayDataSet(DataSet): # Note: setting trait_list to [] is probably not a great idea. if not trait_list: - trait_list = [] + trait_list=[] for this_trait in trait_list: @@ -1034,33 +1095,33 @@ class MrnaAssayDataSet(DataSet): this_trait.retrieveInfo(QTL=1) if not this_trait.symbol: - this_trait.symbol = "N/A" + this_trait.symbol="N/A" # XZ, 12/08/2008: description # XZ, 06/05/2009: Rob asked to add probe target description - description_string = str( + description_string=str( str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = str( + target_string=str( str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': - description_display = description_string + description_display=description_string else: - description_display = this_trait.symbol + description_display=this_trait.symbol if (len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None'): - description_display = description_display + '; ' + target_string.strip() + description_display=description_display + '; ' + target_string.strip() # Save it for the jinja2 template - this_trait.description_display = description_display + this_trait.description_display=description_display if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( + this_trait.location_repr='Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) # Get mean expression value - query = ( + query=( """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and @@ -1068,44 +1129,45 @@ class MrnaAssayDataSet(DataSet): """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - #logger.debug("query is:", pf(query)) + # logger.debug("query is:", pf(query)) logger.sql(query) - result = g.db.execute(query).fetchone() + result=g.db.execute(query).fetchone() - mean = result[0] if result else 0 + mean=result[0] if result else 0 if mean: - this_trait.mean = "%2.3f" % mean + this_trait.mean="%2.3f" % mean # LRS and its location - this_trait.LRS_score_repr = 'N/A' - this_trait.LRS_location_repr = 'N/A' + this_trait.LRS_score_repr='N/A' + this_trait.LRS_location_repr='N/A' # Max LRS and its Locus location if this_trait.lrs and this_trait.locus: - query = """ + query=""" select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) logger.sql(query) - result = g.db.execute(query).fetchone() + result=g.db.execute(query).fetchone() if result: - lrs_chr, lrs_mb = result - this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( + lrs_chr, lrs_mb=result + this_trait.LRS_score_repr='%3.1f' % this_trait.lrs + this_trait.LRS_location_repr='Chr%s: %.6f' % ( lrs_chr, float(lrs_mb)) return trait_list def retrieve_sample_data(self, trait): - query = """ + query=""" SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 FROM - (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + (ProbeSetData, ProbeSetFreeze, + Strain, ProbeSet, ProbeSetXRef) left join ProbeSetSE on (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) left join NStrain on @@ -1121,19 +1183,19 @@ class MrnaAssayDataSet(DataSet): Strain.Name """ % (escape(trait), escape(self.name)) logger.sql(query) - results = g.db.execute(query).fetchall() - #logger.debug("RETRIEVED RESULTS HERE:", results) + results=g.db.execute(query).fetchall() + # logger.debug("RETRIEVED RESULTS HERE:", results) return results def retrieve_genes(self, column_name): - query = """ + query=""" select ProbeSet.Name, ProbeSet.%s from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) logger.sql(query) - results = g.db.execute(query).fetchall() + results=g.db.execute(query).fetchall() return dict(results) @@ -1141,40 +1203,40 @@ class MrnaAssayDataSet(DataSet): class TempDataSet(DataSet): '''Temporary user-generated data set''' - DS_NAME_MAP['Temp'] = 'TempDataSet' + DS_NAME_MAP['Temp']='TempDataSet' def setup(self): - self.search_fields = ['name', + self.search_fields=['name', 'description'] - self.display_fields = ['name', + self.display_fields=['name', 'description'] - self.header_fields = ['Name', + self.header_fields=['Name', 'Description'] - self.type = 'Temp' + self.type='Temp' # Need to double check later how these are used - self.id = 1 - self.fullname = 'Temporary Storage' - self.shortname = 'Temp' + self.id=1 + self.fullname='Temporary Storage' + self.shortname='Temp' def geno_mrna_confidentiality(ob): - dataset_table = ob.type + "Freeze" - #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) + dataset_table=ob.type + "Freeze" + # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) - query = '''SELECT Id, Name, FullName, confidentiality, + query='''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) logger.sql(query) - result = g.db.execute(query) + result=g.db.execute(query) (dataset_id, name, full_name, confidential, - authorized_users) = result.fetchall()[0] + authorized_users)=result.fetchall()[0] if confidential: return True diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index df0afba6..9fbfee48 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -27,6 +27,34 @@ def create_target_this_trait(start_vars): return (this_dataset, this_trait, target_dataset, sample_data) + +def test_process_data(this_trait,dataset,start_vars): + """test function for bxd,all and other sample data""" + + corr_samples_group = start_vars["corr_samples_group"] + + + primary_samples = dataset.group.samplelist + if dataset.group.parlist != None: + primary_samples += dataset.group.parlist + if dataset.group.f1list != None: + primary_samples += dataset.group.f1list + + # If either BXD/whatever Only or All Samples, append all of that group's samplelist + if corr_samples_group != 'samples_other': + sample_data = process_samples(start_vars, primary_samples) + + # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + # exclude the primary samples (because they would have been added in the previous + # if statement if the user selected All Samples) + if corr_samples_group != 'samples_primary': + if corr_samples_group == 'samples_other': + primary_samples = [x for x in primary_samples if x not in ( + dataset.group.parlist + dataset.group.f1list)] + sample_data = process_samples(start_vars, list(this_trait.data.keys()), primary_samples) + + return sample_data + def process_samples(start_vars, sample_names, excluded_samples=None): """process samples""" sample_data = {} @@ -118,13 +146,22 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): sample_data = process_samples( start_vars, this_dataset.group.samplelist) - target_dataset.get_trait_data(list(sample_data.keys())) + + # sample_data = test_process_data(this_trait,this_dataset,start_vars) + + if target_dataset.type =="ProbeSet": + # pass + target_dataset.get_probeset_data(list(sample_data.keys())) + else: + target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) this_trait_data = { "trait_sample_data": sample_data, "trait_id": start_vars["trait_id"] } + # should remove this len(samplelist) == len(strain_values) + results = map_shared_keys_to_values( target_dataset.samplelist, target_dataset.trait_data) @@ -205,6 +242,7 @@ def compute_correlation(start_vars, method="pearson", compute_all=False): "target_dataset": start_vars['corr_dataset'], "return_results": corr_return_results} + return correlation_data @@ -265,3 +303,107 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): } return (primary_tissue_data, target_tissue_data) return None + + +def generate_corr_data(corr_results, target_dataset): + counter = 0 + results_list = [] + for (index, trait_corr) in enumerate(corr_results): + trait_name = list(trait_corr.keys())[0] + trait = create_trait(dataset=target_dataset, + name=trait_name) + + trait_corr_data = trait_corr[trait_name] + + if trait.view == False: + continue + results_dict = {} + results_dict['index'] = index + 1 + results_dict['trait_id'] = trait.name + results_dict['dataset'] = trait.dataset.name + # results_dict['hmac'] = hmac.data_hmac( + # '{}:{}'.format(trait.name, trait.dataset.name)) + if target_dataset.type == "ProbeSet": + results_dict['symbol'] = trait.symbol + results_dict['description'] = "N/A" + results_dict['location'] = trait.location_repr + results_dict['mean'] = "N/A" + results_dict['additive'] = "N/A" + if bool(trait.description_display): + results_dict['description'] = trait.description_display + if bool(trait.mean): + results_dict['mean'] = f"{float(trait.mean):.3f}" + try: + results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" + except: + results_dict['lod_score'] = "N/A" + results_dict['lrs_location'] = trait.LRS_location_repr + if bool(trait.additive): + results_dict['additive'] = f"{float(trait.additive):.3f}" + results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" + results_dict['num_overlap'] = trait.num_overlap + results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" + results_dict['lit_corr'] = "--" + results_dict['tissue_corr'] = "--" + results_dict['tissue_pvalue'] = "--" + tissue_corr = trait_corr_data.get('tissue_corr',0) + lit_corr = trait_corr_data.get('lit_corr',0) + if bool(lit_corr): + results_dict['lit_corr'] = f"{float(trait_corr_data.get('lit_corr',0)):.3f}" + if bool(tissue_corr): + results_dict['tissue_corr'] = f"{float(trait_corr_data.get('tissue_corr',0)):.3f}" + results_dict['tissue_pvalue'] = f"{float(trait_corr_data.get('tissue_pvalue',0)):.3e}" + elif target_dataset.type == "Publish": + results_dict['abbreviation_display'] = "N/A" + results_dict['description'] = "N/A" + results_dict['mean'] = "N/A" + results_dict['authors_display'] = "N/A" + results_dict['additive'] = "N/A" + if for_api: + results_dict['pubmed_id'] = "N/A" + results_dict['year'] = "N/A" + else: + results_dict['pubmed_link'] = "N/A" + results_dict['pubmed_text'] = "N/A" + + if bool(trait.abbreviation): + results_dict['abbreviation_display'] = trait.abbreviation + if bool(trait.description_display): + results_dict['description'] = trait.description_display + if bool(trait.mean): + results_dict['mean'] = f"{float(trait.mean):.3f}" + if bool(trait.authors): + authors_list = trait.authors.split(',') + if len(authors_list) > 6: + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." + else: + results_dict['authors_display'] = trait.authors + if bool(trait.pubmed_id): + if for_api: + results_dict['pubmed_id'] = trait.pubmed_id + results_dict['year'] = trait.pubmed_text + else: + results_dict['pubmed_link'] = trait.pubmed_link + results_dict['pubmed_text'] = trait.pubmed_text + try: + results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" + except: + results_dict['lod_score'] = "N/A" + results_dict['lrs_location'] = trait.LRS_location_repr + if bool(trait.additive): + results_dict['additive'] = f"{float(trait.additive):.3f}" + results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" + results_dict['num_overlap'] = trait.num_overlap + results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" + else: + results_dict['location'] = trait.location_repr + results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" + results_dict['num_overlap'] = trait.num_overlap + results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" + + results_list.append(results_dict) + + return results_list + + -- cgit v1.2.3 From c9a133e75d32e0bb9c8e63443b0f43f084363996 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 24 May 2021 16:37:36 +0300 Subject: pep8 formatting --- wqflask/base/data_set.py | 214 ++++++++++++++++++++++++----------------------- 1 file changed, 109 insertions(+), 105 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 7531ac81..39296f6a 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -659,12 +659,8 @@ class DataSet: "Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - - - def chunk_dataset(self, dataset, n): - results = {} query = """ @@ -677,10 +673,7 @@ class DataSet: # should cache this - traits_name_dict= dict(g.db.execute(query).fetchall()) - - - + traits_name_dict = dict(g.db.execute(query).fetchall()) for i in range(0, len(dataset), n): matrix = list(dataset[i:i + n]) @@ -716,50 +709,50 @@ class DataSet: FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids),self.name) + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name) - query_results=list(g.db.execute(query).fetchall()) + query_results = list(g.db.execute(query).fetchall()) - data_results=self.chunk_dataset(query_results, len(sample_ids)) - self.trait_data=data_results + data_results = self.chunk_dataset(query_results, len(sample_ids)) + self.trait_data = data_results def get_trait_data(self, sample_list=None): if sample_list: - self.samplelist=sample_list + self.samplelist = sample_list else: - self.samplelist=self.group.samplelist + self.samplelist = self.group.samplelist if self.group.parlist != None and self.group.f1list != None: if (self.group.parlist + self.group.f1list) in self.samplelist: self.samplelist += self.group.parlist + self.group.f1list - query=""" + query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} and Strain.SpeciesId=Species.Id and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) logger.sql(query) - results=dict(g.db.execute(query).fetchall()) - sample_ids=[results[item] for item in self.samplelist] + results = dict(g.db.execute(query).fetchall()) + sample_ids = [results[item] for item in self.samplelist] # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks # Postgres doesn't have that limit, so we can get rid of this after we transition - chunk_size=50 - number_chunks=int(math.ceil(len(sample_ids) / chunk_size)) - trait_sample_data=[] + chunk_size = 50 + number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) + trait_sample_data = [] for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): if self.type == "Publish": - dataset_type="Phenotype" + dataset_type = "Phenotype" else: - dataset_type=self.type - temp=['T%s.value' % item for item in sample_ids_step] + dataset_type = self.type + temp = ['T%s.value' % item for item in sample_ids_step] if self.type == "Publish": - query="SELECT {}XRef.Id,".format(escape(self.type)) + query = "SELECT {}XRef.Id,".format(escape(self.type)) else: - query="SELECT {}.Name,".format(escape(dataset_type)) - data_start_pos=1 + query = "SELECT {}.Name,".format(escape(dataset_type)) + data_start_pos = 1 query += ', '.join(temp) query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, self.type, @@ -788,27 +781,27 @@ class DataSet: """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) - results=g.db.execute(query).fetchall() + results = g.db.execute(query).fetchall() trait_sample_data.append(results) - trait_count=len(trait_sample_data[0]) - self.trait_data=collections.defaultdict(list) + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values for trait_counter in range(trait_count): - trait_name=trait_sample_data[0][trait_counter][0] + trait_name = trait_sample_data[0][trait_counter][0] for chunk_counter in range(int(number_chunks)): self.trait_data[trait_name] += ( trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) class PhenotypeDataSet(DataSet): - DS_NAME_MAP['Publish']='PhenotypeDataSet' + DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields=['Phenotype.Post_publication_description', + self.search_fields = ['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', @@ -821,7 +814,7 @@ class PhenotypeDataSet(DataSet): 'PublishXRef.Id'] # Figure out what display_fields is - self.display_fields=['name', 'group_code', + self.display_fields = ['name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', @@ -839,7 +832,7 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields=['Index', + self.header_fields = ['Index', 'Record', 'Description', 'Authors', @@ -848,9 +841,9 @@ class PhenotypeDataSet(DataSet): 'Max LRS Location', 'Additive Effect'] - self.type='Publish' + self.type = 'Publish' - self.query_for_group=''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -870,13 +863,13 @@ class PhenotypeDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieve_info(get_qtl_info=True) - description=this_trait.post_publication_description + description = this_trait.post_publication_description # If the dataset is confidential and the user has access to confidential # phenotype traits, then display the pre-publication description instead # of the post-publication description if this_trait.confidential: - this_trait.description_display="" + this_trait.description_display = "" continue # for now, because no authorization features if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( @@ -884,46 +877,46 @@ class PhenotypeDataSet(DataSet): userName=self.userName, authorized_users=this_trait.authorized_users): - description=this_trait.pre_publication_description + description = this_trait.pre_publication_description if len(description) > 0: - this_trait.description_display=description.strip() + this_trait.description_display = description.strip() else: - this_trait.description_display="" + this_trait.description_display = "" if not this_trait.year.isdigit(): - this_trait.pubmed_text="N/A" + this_trait.pubmed_text = "N/A" else: - this_trait.pubmed_text=this_trait.year + this_trait.pubmed_text = this_trait.year if this_trait.pubmed_id: - this_trait.pubmed_link=webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id + this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id # LRS and its location - this_trait.LRS_score_repr="N/A" - this_trait.LRS_location_repr="N/A" + this_trait.LRS_score_repr = "N/A" + this_trait.LRS_location_repr = "N/A" if this_trait.lrs: - query=""" + query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '%s' and Geno.Name = '%s' and Geno.SpeciesId = Species.Id """ % (species, this_trait.locus) logger.sql(query) - result=g.db.execute(query).fetchone() + result = g.db.execute(query).fetchone() if result: if result[0] and result[1]: - LRS_Chr=result[0] - LRS_Mb=result[1] + LRS_Chr = result[0] + LRS_Mb = result[1] - this_trait.LRS_score_repr=LRS_score_repr='%3.1f' % this_trait.lrs - this_trait.LRS_location_repr=LRS_location_repr='Chr%s: %.6f' % ( + this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): - query=""" + query = """ SELECT Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2 FROM @@ -941,34 +934,34 @@ class PhenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results=g.db.execute(query, (trait, self.id)).fetchall() + results = g.db.execute(query, (trait, self.id)).fetchall() return results class GenotypeDataSet(DataSet): - DS_NAME_MAP['Geno']='GenotypeDataSet' + DS_NAME_MAP['Geno'] = 'GenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields=['Name', + self.search_fields = ['Name', 'Chr'] # Find out what display_fields is - self.display_fields=['name', + self.display_fields = ['name', 'chr', 'mb', 'source2', 'sequence'] # Fields displayed in the search results table header - self.header_fields=['Index', + self.header_fields = ['Index', 'ID', 'Location'] # Todo: Obsolete or rename this field - self.type='Geno' + self.type = 'Geno' - self.query_for_group=''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -987,11 +980,11 @@ class GenotypeDataSet(DataSet): this_trait.retrieveInfo() if this_trait.chr and this_trait.mb: - this_trait.location_repr='Chr%s: %.6f' % ( + this_trait.location_repr = 'Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) def retrieve_sample_data(self, trait): - query=""" + query = """ SELECT Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 FROM @@ -1008,7 +1001,7 @@ class GenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results=g.db.execute(query, + results = g.db.execute(query, (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)).fetchall() return results @@ -1022,11 +1015,11 @@ class MrnaAssayDataSet(DataSet): platform and is far too specific. ''' - DS_NAME_MAP['ProbeSet']='MrnaAssayDataSet' + DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet' def setup(self): # Fields in the database table - self.search_fields=['Name', + self.search_fields = ['Name', 'Description', 'Probe_Target_Description', 'Symbol', @@ -1036,7 +1029,7 @@ class MrnaAssayDataSet(DataSet): 'RefSeq_TranscriptId'] # Find out what display_fields is - self.display_fields=['name', 'symbol', + self.display_fields = ['name', 'symbol', 'description', 'probe_target_description', 'chr', 'mb', 'alias', 'geneid', @@ -1056,7 +1049,7 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields=['Index', + self.header_fields = ['Index', 'Record', 'Symbol', 'Description', @@ -1067,9 +1060,9 @@ class MrnaAssayDataSet(DataSet): 'Additive Effect'] # Todo: Obsolete or rename this field - self.type='ProbeSet' + self.type = 'ProbeSet' - self.query_for_group=''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -1087,7 +1080,7 @@ class MrnaAssayDataSet(DataSet): # Note: setting trait_list to [] is probably not a great idea. if not trait_list: - trait_list=[] + trait_list = [] for this_trait in trait_list: @@ -1095,33 +1088,33 @@ class MrnaAssayDataSet(DataSet): this_trait.retrieveInfo(QTL=1) if not this_trait.symbol: - this_trait.symbol="N/A" + this_trait.symbol = "N/A" # XZ, 12/08/2008: description # XZ, 06/05/2009: Rob asked to add probe target description - description_string=str( + description_string = str( str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string=str( + target_string = str( str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': - description_display=description_string + description_display = description_string else: - description_display=this_trait.symbol + description_display = this_trait.symbol if (len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None'): - description_display=description_display + '; ' + target_string.strip() + description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template - this_trait.description_display=description_display + this_trait.description_display = description_display if this_trait.chr and this_trait.mb: - this_trait.location_repr='Chr%s: %.6f' % ( + this_trait.location_repr = 'Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) # Get mean expression value - query=( + query = ( """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and @@ -1131,38 +1124,38 @@ class MrnaAssayDataSet(DataSet): # logger.debug("query is:", pf(query)) logger.sql(query) - result=g.db.execute(query).fetchone() + result = g.db.execute(query).fetchone() - mean=result[0] if result else 0 + mean = result[0] if result else 0 if mean: - this_trait.mean="%2.3f" % mean + this_trait.mean = "%2.3f" % mean # LRS and its location - this_trait.LRS_score_repr='N/A' - this_trait.LRS_location_repr='N/A' + this_trait.LRS_score_repr = 'N/A' + this_trait.LRS_location_repr = 'N/A' # Max LRS and its Locus location if this_trait.lrs and this_trait.locus: - query=""" + query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) logger.sql(query) - result=g.db.execute(query).fetchone() + result = g.db.execute(query).fetchone() if result: - lrs_chr, lrs_mb=result - this_trait.LRS_score_repr='%3.1f' % this_trait.lrs - this_trait.LRS_location_repr='Chr%s: %.6f' % ( + lrs_chr, lrs_mb = result + this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( lrs_chr, float(lrs_mb)) return trait_list def retrieve_sample_data(self, trait): - query=""" + query = """ SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 FROM @@ -1183,19 +1176,19 @@ class MrnaAssayDataSet(DataSet): Strain.Name """ % (escape(trait), escape(self.name)) logger.sql(query) - results=g.db.execute(query).fetchall() + results = g.db.execute(query).fetchall() # logger.debug("RETRIEVED RESULTS HERE:", results) return results def retrieve_genes(self, column_name): - query=""" + query = """ select ProbeSet.Name, ProbeSet.%s from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) logger.sql(query) - results=g.db.execute(query).fetchall() + results = g.db.execute(query).fetchall() return dict(results) @@ -1203,40 +1196,51 @@ class MrnaAssayDataSet(DataSet): class TempDataSet(DataSet): '''Temporary user-generated data set''' - DS_NAME_MAP['Temp']='TempDataSet' + DS_NAME_MAP['Temp'] = 'TempDataSet' def setup(self): - self.search_fields=['name', + self.search_fields = ['name', 'description'] - self.display_fields=['name', + self.display_fields = ['name', 'description'] - self.header_fields=['Name', + self.header_fields = ['Name', 'Description'] - self.type='Temp' + self.type = 'Temp' # Need to double check later how these are used - self.id=1 - self.fullname='Temporary Storage' - self.shortname='Temp' + self.id = 1 + self.fullname = 'Temporary Storage' + self.shortname = 'Temp' def geno_mrna_confidentiality(ob): - dataset_table=ob.type + "Freeze" + dataset_table = ob.type + "Freeze" # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) - query='''SELECT Id, Name, FullName, confidentiality, + query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) logger.sql(query) - result=g.db.execute(query) + result = g.db.execute(query) + + (dataset_id, + name, + full_name, + confidential, + authorized_users) = result.fetchall()[0] + + if confidential: + return True +uery) + result = g.db.execute(query) (dataset_id, name, full_name, confidential, - authorized_users)=result.fetchall()[0] + authorized_users) = result.fetchall()[0] if confidential: return True -- cgit v1.2.3 From d8d3a8311106ea97c1fba4ef4606929eb9b0fdcb Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 24 May 2021 16:43:45 +0300 Subject: minor fix --- wqflask/base/data_set.py | 1 - 1 file changed, 1 deletion(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 39296f6a..4a150e86 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1233,7 +1233,6 @@ def geno_mrna_confidentiality(ob): if confidential: return True -uery) result = g.db.execute(query) (dataset_id, -- cgit v1.2.3 From aaacfe3a0abc7ca4fe5bdb486651e018cdc7aba0 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 07:25:03 +0300 Subject: remove unused functions + minor fixes --- wqflask/base/data_set.py | 2 + wqflask/wqflask/correlation/correlation_gn3_api.py | 115 +-------------------- 2 files changed, 7 insertions(+), 110 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4a150e86..4d54cfae 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -684,6 +684,8 @@ class DataSet: return results def get_probeset_data(self, sample_list=None, trait_ids=None): + + # improvement of get trait data--->>> if sample_list: self.samplelist = sample_list diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 9fbfee48..5fa33027 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -27,13 +27,11 @@ def create_target_this_trait(start_vars): return (this_dataset, this_trait, target_dataset, sample_data) - -def test_process_data(this_trait,dataset,start_vars): +def test_process_data(this_trait, dataset, start_vars): """test function for bxd,all and other sample data""" corr_samples_group = start_vars["corr_samples_group"] - primary_samples = dataset.group.samplelist if dataset.group.parlist != None: primary_samples += dataset.group.parlist @@ -51,10 +49,12 @@ def test_process_data(this_trait,dataset,start_vars): if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( dataset.group.parlist + dataset.group.f1list)] - sample_data = process_samples(start_vars, list(this_trait.data.keys()), primary_samples) + sample_data = process_samples(start_vars, list( + this_trait.data.keys()), primary_samples) return sample_data + def process_samples(start_vars, sample_names, excluded_samples=None): """process samples""" sample_data = {} @@ -149,7 +149,7 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): # sample_data = test_process_data(this_trait,this_dataset,start_vars) - if target_dataset.type =="ProbeSet": + if target_dataset.type == "ProbeSet": # pass target_dataset.get_probeset_data(list(sample_data.keys())) else: @@ -242,7 +242,6 @@ def compute_correlation(start_vars, method="pearson", compute_all=False): "target_dataset": start_vars['corr_dataset'], "return_results": corr_return_results} - return correlation_data @@ -303,107 +302,3 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): } return (primary_tissue_data, target_tissue_data) return None - - -def generate_corr_data(corr_results, target_dataset): - counter = 0 - results_list = [] - for (index, trait_corr) in enumerate(corr_results): - trait_name = list(trait_corr.keys())[0] - trait = create_trait(dataset=target_dataset, - name=trait_name) - - trait_corr_data = trait_corr[trait_name] - - if trait.view == False: - continue - results_dict = {} - results_dict['index'] = index + 1 - results_dict['trait_id'] = trait.name - results_dict['dataset'] = trait.dataset.name - # results_dict['hmac'] = hmac.data_hmac( - # '{}:{}'.format(trait.name, trait.dataset.name)) - if target_dataset.type == "ProbeSet": - results_dict['symbol'] = trait.symbol - results_dict['description'] = "N/A" - results_dict['location'] = trait.location_repr - results_dict['mean'] = "N/A" - results_dict['additive'] = "N/A" - if bool(trait.description_display): - results_dict['description'] = trait.description_display - if bool(trait.mean): - results_dict['mean'] = f"{float(trait.mean):.3f}" - try: - results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" - except: - results_dict['lod_score'] = "N/A" - results_dict['lrs_location'] = trait.LRS_location_repr - if bool(trait.additive): - results_dict['additive'] = f"{float(trait.additive):.3f}" - results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" - results_dict['num_overlap'] = trait.num_overlap - results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" - results_dict['lit_corr'] = "--" - results_dict['tissue_corr'] = "--" - results_dict['tissue_pvalue'] = "--" - tissue_corr = trait_corr_data.get('tissue_corr',0) - lit_corr = trait_corr_data.get('lit_corr',0) - if bool(lit_corr): - results_dict['lit_corr'] = f"{float(trait_corr_data.get('lit_corr',0)):.3f}" - if bool(tissue_corr): - results_dict['tissue_corr'] = f"{float(trait_corr_data.get('tissue_corr',0)):.3f}" - results_dict['tissue_pvalue'] = f"{float(trait_corr_data.get('tissue_pvalue',0)):.3e}" - elif target_dataset.type == "Publish": - results_dict['abbreviation_display'] = "N/A" - results_dict['description'] = "N/A" - results_dict['mean'] = "N/A" - results_dict['authors_display'] = "N/A" - results_dict['additive'] = "N/A" - if for_api: - results_dict['pubmed_id'] = "N/A" - results_dict['year'] = "N/A" - else: - results_dict['pubmed_link'] = "N/A" - results_dict['pubmed_text'] = "N/A" - - if bool(trait.abbreviation): - results_dict['abbreviation_display'] = trait.abbreviation - if bool(trait.description_display): - results_dict['description'] = trait.description_display - if bool(trait.mean): - results_dict['mean'] = f"{float(trait.mean):.3f}" - if bool(trait.authors): - authors_list = trait.authors.split(',') - if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join( - authors_list[:6]) + ", et al." - else: - results_dict['authors_display'] = trait.authors - if bool(trait.pubmed_id): - if for_api: - results_dict['pubmed_id'] = trait.pubmed_id - results_dict['year'] = trait.pubmed_text - else: - results_dict['pubmed_link'] = trait.pubmed_link - results_dict['pubmed_text'] = trait.pubmed_text - try: - results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" - except: - results_dict['lod_score'] = "N/A" - results_dict['lrs_location'] = trait.LRS_location_repr - if bool(trait.additive): - results_dict['additive'] = f"{float(trait.additive):.3f}" - results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" - results_dict['num_overlap'] = trait.num_overlap - results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" - else: - results_dict['location'] = trait.location_repr - results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" - results_dict['num_overlap'] = trait.num_overlap - results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" - - results_list.append(results_dict) - - return results_list - - -- cgit v1.2.3 From 48e66dc230292a9c0b66da946106d8c9f611074f Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 00:54:02 +0300 Subject: sort sample name by sample_ids --- wqflask/base/data_set.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4d54cfae..991c9fee 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -705,6 +705,9 @@ class DataSet: results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] + sorted_samplelist = [strain_name for strain_name, strain_id in sorted( + results.items(), key=lambda item: item[1])] + query = """SELECT * from ProbeSetData where StrainID in {} and id in (SELECT ProbeSetXRef.DataId @@ -714,9 +717,10 @@ class DataSet: and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name) query_results = list(g.db.execute(query).fetchall()) - data_results = self.chunk_dataset(query_results, len(sample_ids)) + self.samplelist = sorted_samplelist self.trait_data = data_results + def get_trait_data(self, sample_list=None): if sample_list: -- cgit v1.2.3 From 8521ba973ccf14b6918948a93cfde07d6cf1e27a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 09:28:46 +0300 Subject: delete loggers and comments --- wqflask/base/data_set.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 991c9fee..4cb82665 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -440,7 +440,6 @@ class DatasetGroup: # genotype_1 is Dataset Object without parents and f1 # genotype_2 is Dataset Object with parents and f1 (not for intercross) - # genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: @@ -1128,7 +1127,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - # logger.debug("query is:", pf(query)) logger.sql(query) result = g.db.execute(query).fetchone() @@ -1183,7 +1181,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(trait), escape(self.name)) logger.sql(query) results = g.db.execute(query).fetchall() - # logger.debug("RETRIEVED RESULTS HERE:", results) return results def retrieve_genes(self, column_name): @@ -1224,7 +1221,6 @@ class TempDataSet(DataSet): def geno_mrna_confidentiality(ob): dataset_table = ob.type + "Freeze" - # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) -- cgit v1.2.3 From f4fbb6d53419a19c6ee67977d18605cdcbb09c0e Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 12 Aug 2021 22:35:53 +0000 Subject: add function for reading in JSON file that lists sample lists unique to each study within a group (in this case only BXD Longevity for now) --- wqflask/base/data_set.py | 9 +++++++++ wqflask/wqflask/show_trait/show_trait.py | 1 + 2 files changed, 10 insertions(+) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4cb82665..b8f2f9fb 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -398,6 +398,15 @@ class DatasetGroup: if maternal and paternal: self.parlist = [maternal, paternal] + def get_study_samplelists(self): + study_sample_file = "%s/study_sample_lists/%s.json" % (webqtlConfig.GENODIR, self.name) + try: + f = open(study_sample_file) + except: + return None + study_samples = json.load(f) + return study_samples + def get_genofiles(self): jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name) try: diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index c07430dd..d3356bc3 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -192,6 +192,7 @@ class ShowTrait: [self.dataset.species.chromosomes.chromosomes[this_chr].name, i]) self.genofiles = self.dataset.group.get_genofiles() + self.study_samplelists = self.dataset.group.get_study_samplelists() # ZS: No need to grab scales from .geno file unless it's using # a mapping method that reads .geno files -- cgit v1.2.3 From 98683bd5cc809aa03e0bd58a67733498b4f56a9d Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 12 Aug 2021 23:17:04 +0000 Subject: Fix the way the study_sample_lists path is set and checked --- wqflask/base/data_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index b8f2f9fb..1042e1bd 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -399,7 +399,7 @@ class DatasetGroup: self.parlist = [maternal, paternal] def get_study_samplelists(self): - study_sample_file = "%s/study_sample_lists/%s.json" % (webqtlConfig.GENODIR, self.name) + study_sample_file = locate_ignore_error(self.name + ".json", 'study_sample_lists') try: f = open(study_sample_file) except: -- cgit v1.2.3 From 810b2ace0a9cb2511cf0ef6f0c01f70a0ce11915 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 12 Aug 2021 23:19:38 +0000 Subject: Return empty list instead of None in get_study_samplelists --- wqflask/base/data_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 1042e1bd..0ea61faa 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -403,7 +403,7 @@ class DatasetGroup: try: f = open(study_sample_file) except: - return None + return [] study_samples = json.load(f) return study_samples -- cgit v1.2.3 From 579ae94b08f0b1cef00350b54dadc12869ad70de Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Fri, 6 Aug 2021 16:15:17 +0300 Subject: base: data_set: Remove unnecessary comments and logging statements --- wqflask/base/data_set.py | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4cb82665..edc22540 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -277,7 +277,6 @@ class Markers: filtered_markers = [] for marker in self.markers: if marker['name'] in p_values: - # logger.debug("marker {} IS in p_values".format(i)) marker['p_value'] = p_values[marker['name']] if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): marker['lod_score'] = 0 @@ -298,7 +297,6 @@ class HumanMarkers(Markers): self.markers = [] for line in marker_data_fh: splat = line.strip().split() - # logger.debug("splat:", splat) if len(specified_markers) > 0: if splat[1] in specified_markers: marker = {} @@ -737,7 +735,6 @@ class DataSet: and Strain.SpeciesId=Species.Id and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) - logger.sql(query) results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] @@ -908,7 +905,6 @@ class PhenotypeDataSet(DataSet): Geno.Name = '%s' and Geno.SpeciesId = Species.Id """ % (species, this_trait.locus) - logger.sql(query) result = g.db.execute(query).fetchone() if result: @@ -938,7 +934,6 @@ class PhenotypeDataSet(DataSet): Order BY Strain.Name """ - logger.sql(query) results = g.db.execute(query, (trait, self.id)).fetchall() return results @@ -1005,7 +1000,6 @@ class GenotypeDataSet(DataSet): Order BY Strain.Name """ - logger.sql(query) results = g.db.execute(query, (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)).fetchall() @@ -1126,8 +1120,6 @@ class MrnaAssayDataSet(DataSet): ProbeSet.Name = '%s' """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - - logger.sql(query) result = g.db.execute(query).fetchone() mean = result[0] if result else 0 @@ -1147,7 +1139,6 @@ class MrnaAssayDataSet(DataSet): Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) - logger.sql(query) result = g.db.execute(query).fetchone() if result: @@ -1179,7 +1170,6 @@ class MrnaAssayDataSet(DataSet): Order BY Strain.Name """ % (escape(trait), escape(self.name)) - logger.sql(query) results = g.db.execute(query).fetchall() return results @@ -1190,7 +1180,6 @@ class MrnaAssayDataSet(DataSet): where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) - logger.sql(query) results = g.db.execute(query).fetchall() return dict(results) @@ -1224,7 +1213,6 @@ def geno_mrna_confidentiality(ob): query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) - logger.sql(query) result = g.db.execute(query) (dataset_id, -- cgit v1.2.3 From 898b80d34442c16399366a5744cc26a157a727cb Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 20 Aug 2021 17:46:20 +0000 Subject: Fixed some logic in trait.py that could cause a problem if a dataset_name was passed that wasn't Temp --- wqflask/base/trait.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 10851e00..96a09302 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -27,11 +27,13 @@ def create_trait(**kw): assert bool(kw.get('name')), "Needs trait name" - if kw.get('dataset_name'): + if bool(kw.get('dataset')): + dataset = kw.get('dataset') + else: if kw.get('dataset_name') != "Temp": dataset = create_dataset(kw.get('dataset_name')) - else: - dataset = kw.get('dataset') + else: + dataset = create_dataset("Temp", group_name=kw.get('group_name')) if dataset.type == 'Publish': permissions = check_resource_availability( -- cgit v1.2.3