diff options
-rw-r--r-- | wqflask/wqflask/show_trait/show_trait.py | 237 |
1 files changed, 139 insertions, 98 deletions
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 457eb902..e5e94c7e 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -19,20 +19,22 @@ from utility import helper_functions from utility.authentication_tools import check_owner_or_admin from utility.tools import locate_ignore_error from utility.redis_tools import get_redis_conn, get_resource_id +from utility.logger import getLogger + + Redis = get_redis_conn() ONE_YEAR = 60 * 60 * 24 * 365 - -from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) ############################################### # -# Todo: Put in security to ensure that user has permission to access confidential data sets -# And add i.p.limiting as necessary +# Todo: Put in security to ensure that user has permission to access +# confidential data sets And add i.p.limiting as necessary # ############################################## + class ShowTrait(object): def __init__(self, kw): @@ -41,13 +43,16 @@ class ShowTrait(object): self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) self.resource_id = get_resource_id(self.dataset, self.trait_id) - self.admin_status = check_owner_or_admin(resource_id=self.resource_id) + self.admin_status = check_owner_or_admin( + resource_id=self.resource_id) elif 'group' in kw: self.temp_trait = True - self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + self.trait_id = "Temp_"+kw['species'] + "_" + kw['group'] + \ + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) # Put values in Redis so they can be looked up later if added to a collection Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR) @@ -56,25 +61,28 @@ class ShowTrait(object): name=self.trait_id, cellid=None) - self.admin_status = check_owner_or_admin(dataset=self.dataset, trait_id=self.trait_id) + self.admin_status = check_owner_or_admin( + dataset=self.dataset, trait_id=self.trait_id) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() - self.admin_status = check_owner_or_admin(dataset=self.dataset, trait_id=self.trait_id) + self.admin_status = check_owner_or_admin( + dataset=self.dataset, trait_id=self.trait_id) - #ZS: Get verify/rna-seq link URLs + # ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.blatseq if not blatsequence: - #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. + # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query1 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND @@ -91,10 +99,10 @@ class ShowTrait(object): if int(seqt[1][-1]) % 2 == 1: blatsequence += string.strip(seqt[0]) - #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe + # --------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A' - #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. + # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query2 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND @@ -105,24 +113,29 @@ class ShowTrait(object): seqs = g.db.execute(query2).fetchall() for seqt in seqs: - if int(seqt[1][-1]) %2 == 1: - blatsequence += '%3EProbe_' + seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A' + if int(seqt[1][-1]) % 2 == 1: + blatsequence += '%3EProbe_' + \ + seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A' if self.dataset.group.species == "rat": - self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6', blatsequence) + self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ( + 'rat', 'rn6', blatsequence) self.UTHSC_BLAT_URL = "" elif self.dataset.group.species == "mouse": - self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10', blatsequence) - self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence) + self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ( + 'mouse', 'mm10', blatsequence) + self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ( + 'mouse', 'mm10', blatsequence) elif self.dataset.group.species == "human": - self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38', blatsequence) + self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ( + 'human', 'hg38', blatsequence) self.UTHSC_BLAT_URL = "" else: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" except: - self.UCSC_BLAT_URL = "" - self.UTHSC_BLAT_URL = "" + self.UCSC_BLAT_URL = "" + self.UTHSC_BLAT_URL = "" if self.dataset.type == "ProbeSet": self.show_probes = "True" @@ -136,7 +149,8 @@ class ShowTrait(object): # Get nearest marker for composite mapping if not self.temp_trait: if check_if_attr_exists(self.this_trait, 'locus_chr') and self.dataset.type != "Geno" and self.dataset.type != "Publish": - self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset) + self.nearest_marker = get_nearest_marker( + self.this_trait, self.dataset) else: self.nearest_marker = "" @@ -159,22 +173,25 @@ class ShowTrait(object): categorical_var_list = [] if not self.temp_trait: # ZS: Only using first samplelist, since I think mapping only uses those samples - categorical_var_list = get_categorical_variables(self.this_trait, self.sample_groups[0]) + categorical_var_list = get_categorical_variables( + self.this_trait, self.sample_groups[0]) # ZS: Get list of chromosomes to select for mapping self.chr_list = [["All", -1]] for i, this_chr in enumerate(self.dataset.species.chromosomes.chromosomes): - self.chr_list.append([self.dataset.species.chromosomes.chromosomes[this_chr].name, i]) + self.chr_list.append( + [self.dataset.species.chromosomes.chromosomes[this_chr].name, i]) self.genofiles = self.dataset.group.get_genofiles() # ZS: No need to grab scales from .geno file unless it's using # a mapping method that reads .geno files - if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: + if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: if self.genofiles: self.scales_in_geno = get_genotype_scales(self.genofiles) else: - self.scales_in_geno = get_genotype_scales(self.dataset.group.name + ".geno") + self.scales_in_geno = get_genotype_scales( + self.dataset.group.name + ".geno") else: self.scales_in_geno = {} @@ -211,7 +228,8 @@ class ShowTrait(object): sample_column_width = max_samplename_width * 8 - self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups, sample_column_width, self.has_num_cases) + self.stats_table_width, self.trait_table_width = get_table_widths( + self.sample_groups, sample_column_width, self.has_num_cases) if self.num_values >= 5000: self.maf = 0.01 @@ -242,9 +260,9 @@ class ShowTrait(object): hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: - hddn['temp_trait'] = True - hddn['group'] = self.temp_group - hddn['species'] = self.temp_species + hddn['temp_trait'] = True + hddn['group'] = self.temp_group + hddn['species'] = self.temp_species hddn['use_outliers'] = False hddn['method'] = "gemma" hddn['selected_chr'] = -1 @@ -264,41 +282,46 @@ class ShowTrait(object): hddn['export_data'] = "" hddn['export_format'] = "excel" if len(self.scales_in_geno) < 2: - hddn['mapping_scale'] = self.scales_in_geno[list(self.scales_in_geno.keys())[0]][0][0] + hddn['mapping_scale'] = self.scales_in_geno[list( + self.scales_in_geno.keys())[0]][0][0] # We'll need access to this_trait and hddn in the Jinja2 # Template, so we put it inside self self.hddn = hddn - js_data = dict(trait_id = self.trait_id, - trait_symbol = trait_symbol, - short_description = short_description, - unit_type = trait_units, - dataset_type = self.dataset.type, - species = self.dataset.group.species, - scales_in_geno = self.scales_in_geno, - data_scale = self.dataset.data_scale, - sample_group_types = self.sample_group_types, - sample_lists = sample_lists, - se_exists = self.sample_groups[0].se_exists, - has_num_cases = self.has_num_cases, - attributes = self.sample_groups[0].attributes, - categorical_attr_exists = self.categorical_attr_exists, - categorical_vars = ",".join(categorical_var_list), - num_values = self.num_values, - qnorm_values = self.qnorm_vals, - zscore_values = self.z_scores, - sample_column_width = sample_column_width, - temp_uuid = self.temp_uuid) + js_data = dict(trait_id=self.trait_id, + trait_symbol=trait_symbol, + short_description=short_description, + unit_type=trait_units, + dataset_type=self.dataset.type, + species=self.dataset.group.species, + scales_in_geno=self.scales_in_geno, + data_scale=self.dataset.data_scale, + sample_group_types=self.sample_group_types, + sample_lists=sample_lists, + se_exists=self.sample_groups[0].se_exists, + has_num_cases=self.has_num_cases, + attributes=self.sample_groups[0].attributes, + categorical_attr_exists=self.categorical_attr_exists, + categorical_vars=",".join(categorical_var_list), + num_values=self.num_values, + qnorm_values=self.qnorm_vals, + zscore_values=self.z_scores, + sample_column_width=sample_column_width, + temp_uuid=self.temp_uuid) self.js_data = js_data def get_external_links(self): # ZS: There's some weirdness here because some fields don't # exist while others are empty strings - self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists(self.this_trait, 'pubmed_id') else None - self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists(self.this_trait, 'geneid') else None - self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists(self.this_trait, 'omim') else None - self.homologene_link = webqtlConfig.HOMOLOGENE_ID % self.this_trait.homologeneid if check_if_attr_exists(self.this_trait, 'homologeneid') else None + self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists( + self.this_trait, 'pubmed_id') else None + self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists( + self.this_trait, 'geneid') else None + self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists( + self.this_trait, 'omim') else None + self.homologene_link = webqtlConfig.HOMOLOGENE_ID % self.this_trait.homologeneid if check_if_attr_exists( + self.this_trait, 'homologeneid') else None self.genbank_link = None if check_if_attr_exists(self.this_trait, 'genbankid'): @@ -322,11 +345,14 @@ class ShowTrait(object): self.protein_atlas_link = webqtlConfig.PROTEIN_ATLAS_URL % self.this_trait.symbol if self.dataset.group.species == "mouse" or self.dataset.group.species == "human": - self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize()) + self.rgd_link = webqtlConfig.RGD_URL % ( + self.this_trait.symbol, self.dataset.group.species.capitalize()) if self.dataset.group.species == "mouse": - self.genemania_link = webqtlConfig.GENEMANIA_URL % ("mus-musculus", self.this_trait.symbol) + self.genemania_link = webqtlConfig.GENEMANIA_URL % ( + "mus-musculus", self.this_trait.symbol) else: - self.genemania_link = webqtlConfig.GENEMANIA_URL % ("homo-sapiens", self.this_trait.symbol) + self.genemania_link = webqtlConfig.GENEMANIA_URL % ( + "homo-sapiens", self.this_trait.symbol) if self.dataset.group.species == "mouse": self.aba_link = webqtlConfig.ABA_URL % self.this_trait.symbol @@ -344,12 +370,16 @@ class ShowTrait(object): if chr and transcript_start and transcript_end and self.this_trait.refseq_transcriptid: transcript_start = int(transcript_start*1000000) transcript_end = int(transcript_end*1000000) - self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ('mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end) + self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( + 'mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end) if self.dataset.group.species == "rat": - self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize()) - self.phenogen_link = webqtlConfig.PHENOGEN_URL % (self.this_trait.symbol) - self.genemania_link = webqtlConfig.GENEMANIA_URL % ("rattus-norvegicus", self.this_trait.symbol) + self.rgd_link = webqtlConfig.RGD_URL % ( + self.this_trait.symbol, self.dataset.group.species.capitalize()) + self.phenogen_link = webqtlConfig.PHENOGEN_URL % ( + self.this_trait.symbol) + self.genemania_link = webqtlConfig.GENEMANIA_URL % ( + "rattus-norvegicus", self.this_trait.symbol) query = """SELECT kgID, chromosome, txStart, txEnd FROM GeneList_rn33 @@ -362,12 +392,15 @@ class ShowTrait(object): kgId = chr = transcript_start = transcript_end = None if chr and transcript_start and transcript_end and kgId: - transcript_start = int(transcript_start*1000000) # Convert to bases from megabases + # Convert to bases from megabases + transcript_start = int(transcript_start*1000000) transcript_end = int(transcript_end*1000000) - self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ('rn6', kgId, chr, transcript_start, transcript_end) + self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( + 'rn6', kgId, chr, transcript_start, transcript_end) if self.this_trait.geneid and (self.dataset.group.species == "mouse" or self.dataset.group.species == "rat" or self.dataset.group.species == "human"): - self.biogps_link = webqtlConfig.BIOGPS_URL % (self.dataset.group.species, self.this_trait.geneid) + self.biogps_link = webqtlConfig.BIOGPS_URL % ( + self.dataset.group.species, self.this_trait.geneid) self.gemma_link = webqtlConfig.GEMMA_URL % self.this_trait.geneid if self.dataset.group.species == "human": @@ -388,40 +421,43 @@ class ShowTrait(object): if self.temp_trait == True: dataset_menu = data_set.datasets(this_group) else: - dataset_menu = data_set.datasets(this_group, self.dataset.group) + dataset_menu = data_set.datasets( + this_group, self.dataset.group) dataset_menu_selected = None if len(dataset_menu): if self.dataset: dataset_menu_selected = self.dataset.name - return_results_menu = (100, 200, 500, 1000, 2000, 5000, 10000, 15000, 20000) + return_results_menu = (100, 200, 500, 1000, + 2000, 5000, 10000, 15000, 20000) return_results_menu_selected = 500 - self.corr_tools = dict(dataset_menu = dataset_menu, - dataset_menu_selected = dataset_menu_selected, - return_results_menu = return_results_menu, - return_results_menu_selected = return_results_menu_selected,) + self.corr_tools = dict(dataset_menu=dataset_menu, + dataset_menu_selected=dataset_menu_selected, + return_results_menu=return_results_menu, + return_results_menu_selected=return_results_menu_selected,) def make_sample_lists(self): all_samples_ordered = self.dataset.group.all_samples_ordered() - + parent_f1_samples = [] if self.dataset.group.parlist and self.dataset.group.f1list: parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list primary_sample_names = list(all_samples_ordered) - if not self.temp_trait: other_sample_names = [] for sample in list(self.this_trait.data.keys()): if (self.this_trait.data[sample].name2 != self.this_trait.data[sample].name): if ((self.this_trait.data[sample].name2 in primary_sample_names) and - (self.this_trait.data[sample].name not in primary_sample_names)): - primary_sample_names.append(self.this_trait.data[sample].name) - primary_sample_names.remove(self.this_trait.data[sample].name2) + (self.this_trait.data[sample].name not in primary_sample_names)): + primary_sample_names.append( + self.this_trait.data[sample].name) + primary_sample_names.remove( + self.this_trait.data[sample].name2) all_samples_set = set(all_samples_ordered) if sample not in all_samples_set: @@ -440,34 +476,34 @@ class ShowTrait(object): else: primary_header = "Samples" - primary_samples = SampleList(dataset = self.dataset, - sample_names=primary_sample_names, - this_trait=self.this_trait, - sample_group_type='primary', - header=primary_header) + primary_samples = SampleList(dataset=self.dataset, + sample_names=primary_sample_names, + this_trait=self.this_trait, + sample_group_type='primary', + header=primary_header) # if other_sample_names and self.dataset.group.species != # "human" and self.dataset.group.name != "CFW": if len(other_sample_names) > 0: - other_sample_names.sort() #Sort other samples + other_sample_names.sort() # Sort other samples if parent_f1_samples: other_sample_names = parent_f1_samples + other_sample_names other_samples = SampleList(dataset=self.dataset, - sample_names=other_sample_names, - this_trait=self.this_trait, - sample_group_type='other', - header="Other") + sample_names=other_sample_names, + this_trait=self.this_trait, + sample_group_type='other', + header="Other") self.sample_groups = (primary_samples, other_samples) else: self.sample_groups = (primary_samples,) else: - primary_samples = SampleList(dataset = self.dataset, - sample_names=primary_sample_names, - this_trait=self.trait_vals, - sample_group_type='primary', - header="%s Only" % (self.dataset.group.name)) + primary_samples = SampleList(dataset=self.dataset, + sample_names=primary_sample_names, + this_trait=self.trait_vals, + sample_group_type='primary', + header="%s Only" % (self.dataset.group.name)) self.sample_groups = (primary_samples,) self.primary_sample_names = primary_sample_names @@ -622,8 +658,10 @@ def get_ncbi_summary(this_trait): # ZS: Need to switch this try/except to something that checks # the output later try: - response = requests.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid) - summary = json.loads(response.content)['result'][this_trait.geneid]['summary'] + response = requests.get( + "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid) + summary = json.loads(response.content)[ + 'result'][this_trait.geneid]['summary'] return summary except: return None @@ -639,13 +677,15 @@ def get_categorical_variables(this_trait, sample_list): attribute_vals = [] for sample_name in list(this_trait.data.keys()): if sample_list.attributes[attribute].name in this_trait.data[sample_name].extra_attributes: - attribute_vals.append(this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name]) + attribute_vals.append( + this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name]) else: attribute_vals.append("N/A") num_distinct = len(set(attribute_vals)) if num_distinct < 10: - categorical_var_list.append(sample_list.attributes[attribute].name) + categorical_var_list.append( + sample_list.attributes[attribute].name) return categorical_var_list @@ -655,7 +695,8 @@ def get_genotype_scales(genofiles): if isinstance(genofiles, list): for the_file in genofiles: file_location = the_file['location'] - geno_scales[file_location] = get_scales_from_genofile(file_location) + geno_scales[file_location] = get_scales_from_genofile( + file_location) else: geno_scales[genofiles] = get_scales_from_genofile(genofiles) |