diff options
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 15 | ||||
-rw-r--r-- | wqflask/base/species.py | 14 | ||||
-rw-r--r-- | wqflask/base/trait.py | 421 | ||||
-rw-r--r-- | wqflask/utility/authentication_tools.py | 13 | ||||
-rw-r--r-- | wqflask/utility/helper_functions.py | 27 | ||||
-rw-r--r-- | wqflask/wqflask/api/router.py | 18 | ||||
-rw-r--r-- | wqflask/wqflask/db_info.py | 22 | ||||
-rw-r--r-- | wqflask/wqflask/do_search.py | 7 | ||||
-rw-r--r-- | wqflask/wqflask/external_tools/send_to_geneweaver.py | 79 | ||||
-rw-r--r-- | wqflask/wqflask/external_tools/send_to_webgestalt.py | 82 | ||||
-rw-r--r-- | wqflask/wqflask/gsearch.py | 152 | ||||
-rw-r--r-- | wqflask/wqflask/interval_analyst/GeneUtil.py | 193 | ||||
-rw-r--r-- | wqflask/wqflask/show_trait/SampleList.py | 53 | ||||
-rw-r--r-- | wqflask/wqflask/show_trait/show_trait.py | 106 |
14 files changed, 627 insertions, 575 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index d7e747aa..b371e39f 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -1,6 +1,6 @@ import collections -from flask import g +from wqflask.database import database_connection from utility import db_tools from utility import Bunch @@ -49,7 +49,11 @@ class MrnaAssayTissueData: # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: lower_symbols[gene_symbol.lower()] = True - results = list(g.db.execute(query).fetchall()) + + results = None + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() for result in results: symbol = result[0] if symbol is not None and lower_symbols.get(symbol.lower()): @@ -81,9 +85,10 @@ class MrnaAssayTissueData: FROM TissueProbeSetXRef, TissueProbeSetData WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) - - - results = g.db.execute(query).fetchall() + results = [] + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: symbol_values_dict[result.Symbol.lower()] = [result.value] diff --git a/wqflask/base/species.py b/wqflask/base/species.py index f303aabb..0a13315c 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -2,7 +2,7 @@ from collections import OrderedDict from dataclasses import dataclass from dataclasses import InitVar from typing import Optional, Dict -from flask import g +from wqflask.database import database_connection @dataclass @@ -46,20 +46,20 @@ class Chromosomes: """Lazily fetch the chromosomes""" chromosomes = OrderedDict() if self.species is not None: - query = ( + cursor.execute( "SELECT Chr_Length.Name, Chr_Length.OrderId, Length " "FROM Chr_Length, Species WHERE " "Chr_Length.SpeciesId = Species.SpeciesId AND " - "Species.Name = " - "'%s' ORDER BY OrderId" % self.species.capitalize()) + "Species.Name = %s " + "ORDER BY OrderId", (self.species.capitalize(),)) else: - query = ( + cursor.execute( "SELECT Chr_Length.Name, Chr_Length.OrderId, " "Length FROM Chr_Length, InbredSet WHERE " "Chr_Length.SpeciesId = InbredSet.SpeciesId AND " "InbredSet.Name = " - "'%s' ORDER BY OrderId" % self.dataset.group.name) - results = g.db.execute(query).fetchall() + "%s ORDER BY OrderId", (self.dataset.group.name,)) + results = cursor.fetchall() for item in results: chromosomes[item.OrderId] = IndChromosome( item.Name, item.Length) diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index b02c6033..57cc8a9b 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -13,8 +13,9 @@ from utility.redis_tools import get_redis_conn, get_resource_id from utility.db_tools import escape from flask import g, request, url_for - from utility.logger import getLogger +from wqflask.database import database_connection + logger = getLogger(__name__) @@ -386,226 +387,220 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: the_url = GN_PROXY_URL + "run-action?resource={}&user={}&branch=data&action=view&trait={}".format( resource_id, g.user_session.user_id, trait.name) + with database_connection() as conn, conn.cursor() as cursor: + try: + response = requests.get(the_url).content + trait_info = json.loads(response) + except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait + trait_info = () + if dataset.type == 'Publish': + cursor.execute( + "SELECT PublishXRef.Id, InbredSet.InbredSetCode, " + "Publication.PubMed_ID, " + "CAST(Phenotype.Pre_publication_description AS BINARY), " + "CAST(Phenotype.Post_publication_description AS BINARY), " + "CAST(Phenotype.Original_description AS BINARY), " + "CAST(Phenotype.Pre_publication_abbreviation AS BINARY), " + "CAST(Phenotype.Post_publication_abbreviation AS BINARY), " + "PublishXRef.mean, Phenotype.Lab_code, " + "Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, " + "CAST(Publication.Authors AS BINARY), " + "CAST(Publication.Title AS BINARY), " + "CAST(Publication.Abstract AS BINARY), " + "CAST(Publication.Journal AS BINARY), " + "Publication.Volume, Publication.Pages, " + "Publication.Month, Publication.Year, " + "PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments FROM PublishXRef, Publication, " + "Phenotype, PublishFreeze, InbredSet WHERE " + "PublishXRef.Id = %s AND " + "Phenotype.Id = PublishXRef.PhenotypeId " + "AND Publication.Id = PublishXRef.PublicationId " + "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " + "AND PublishXRef.InbredSetId = InbredSet.Id AND " + "PublishFreeze.Id = %s", + (trait.name, dataset.id,) + ) + trait_info = cursor.fetchone() + + # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name + # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. + elif dataset.type == 'ProbeSet': + display_fields_string = ', ProbeSet.'.join(dataset.display_fields) + display_fields_string = f'ProbeSet.{display_fields_string}' + cursor.execute( + "SELECT %s FROM ProbeSet, ProbeSetFreeze, " + "ProbeSetXRef WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %s AND " + "ProbeSet.Name = %s", + (display_fields_string, dataset.name, + str(trait.name),) + ) + trait_info = cursor.fetchone() + # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name + # to avoid the problem of same marker name from different species. + elif dataset.type == 'Geno': + display_fields_string = ',Geno.'.join(dataset.display_fields) + display_fields_string = f'Geno.{display_fields_string}' + cursor.execute( + "SELECT %s FROM Geno, GenoFreeze, " + "GenoXRef WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id " + "AND GenoXRef.GenoId = Geno.Id " + "AND GenoFreeze.Name = %s " + "AND Geno.Name = %s", + (display_fields_string, dataset.name, trait.name) + ) + trait_info = cursor.fetchone() + else: # Temp type + cursor.execute( + "SELECT %s FROM %s WHERE Name = %s", + (','.join(dataset.display_fields), + dataset.type, trait.name,) + ) + trait_info = cursor.fetchone() + + if trait_info: + trait.haveinfo = True + for i, field in enumerate(dataset.display_fields): + holder = trait_info[i] + if isinstance(holder, bytes): + holder = holder.decode("utf-8", errors="ignore") + setattr(trait, field, holder) - try: - response = requests.get(the_url).content - trait_info = json.loads(response) - except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait - if dataset.type == 'Publish': - query = """ - SELECT - PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, - CAST(Phenotype.Pre_publication_description AS BINARY), - CAST(Phenotype.Post_publication_description AS BINARY), - CAST(Phenotype.Original_description AS BINARY), - CAST(Phenotype.Pre_publication_abbreviation AS BINARY), - CAST(Phenotype.Post_publication_abbreviation AS BINARY), PublishXRef.mean, - Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, - CAST(Publication.Authors AS BINARY), CAST(Publication.Title AS BINARY), CAST(Publication.Abstract AS BINARY), - CAST(Publication.Journal AS BINARY), Publication.Volume, Publication.Pages, - Publication.Month, Publication.Year, PublishXRef.Sequence, - Phenotype.Units, PublishXRef.comments - FROM - PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet - WHERE - PublishXRef.Id = %s AND - Phenotype.Id = PublishXRef.PhenotypeId AND - Publication.Id = PublishXRef.PublicationId AND - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishXRef.InbredSetId = InbredSet.Id AND - PublishFreeze.Id = %s - """ % (trait.name, dataset.id) - - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - - # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. - elif dataset.type == 'ProbeSet': - display_fields_string = ', ProbeSet.'.join(dataset.display_fields) - display_fields_string = 'ProbeSet.' + display_fields_string - query = """ - SELECT %s - FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSet.Name = '%s' - """ % (escape(display_fields_string), - escape(dataset.name), - escape(str(trait.name))) - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name - # to avoid the problem of same marker name from different species. - elif dataset.type == 'Geno': - display_fields_string = ',Geno.'.join(dataset.display_fields) - display_fields_string = 'Geno.' + display_fields_string - query = """ - SELECT %s - FROM Geno, GenoFreeze, GenoXRef - WHERE - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoXRef.GenoId = Geno.Id AND - GenoFreeze.Name = '%s' AND - Geno.Name = '%s' - """ % (escape(display_fields_string), - escape(dataset.name), - escape(trait.name)) - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - else: # Temp type - query = """SELECT %s FROM %s WHERE Name = %s""" - logger.sql(query) - trait_info = g.db.execute(query, - ','.join(dataset.display_fields), - dataset.type, trait.name).fetchone() - - if trait_info: - trait.haveinfo = True - for i, field in enumerate(dataset.display_fields): - holder = trait_info[i] - if isinstance(holder, bytes): - holder = holder.decode("utf-8", errors="ignore") - setattr(trait, field, holder) - - if dataset.type == 'Publish': - if trait.group_code: - trait.display_name = trait.group_code + "_" + str(trait.name) - - trait.confidential = 0 - if trait.pre_publication_description and not trait.pubmed_id: - trait.confidential = 1 - - description = trait.post_publication_description - - # If the dataset is confidential and the user has access to confidential - # phenotype traits, then display the pre-publication description instead - # of the post-publication description - trait.description_display = "N/A" - if not trait.pubmed_id: - trait.abbreviation = trait.pre_publication_abbreviation - if trait.pre_publication_description: - trait.description_display = trait.pre_publication_description - else: - trait.abbreviation = trait.post_publication_abbreviation - if description: - trait.description_display = description.strip() + if dataset.type == 'Publish': + if trait.group_code: + trait.display_name = trait.group_code + "_" + str(trait.name) + + trait.confidential = 0 + if trait.pre_publication_description and not trait.pubmed_id: + trait.confidential = 1 + + description = trait.post_publication_description + + # If the dataset is confidential and the user has access to confidential + # phenotype traits, then display the pre-publication description instead + # of the post-publication description + trait.description_display = "N/A" + if not trait.pubmed_id: + trait.abbreviation = trait.pre_publication_abbreviation + if trait.pre_publication_description: + trait.description_display = trait.pre_publication_description + else: + trait.abbreviation = trait.post_publication_abbreviation + if description: + trait.description_display = description.strip() - if not trait.year.isdigit(): - trait.pubmed_text = "N/A" - else: - trait.pubmed_text = trait.year + if not trait.year.isdigit(): + trait.pubmed_text = "N/A" + else: + trait.pubmed_text = trait.year - if trait.pubmed_id: - trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id + if trait.pubmed_id: + trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id - if dataset.type == 'ProbeSet' and dataset.group: - description_string = trait.description - target_string = trait.probe_target_description + if dataset.type == 'ProbeSet' and dataset.group: + description_string = trait.description + target_string = trait.probe_target_description - if str(description_string or "") != "" and description_string != 'None': - description_display = description_string - else: - description_display = trait.symbol - - if (str(description_display or "") != "" - and description_display != 'N/A' - and str(target_string or "") != "" and target_string != 'None'): - description_display = description_display + '; ' + target_string.strip() - - # Save it for the jinja2 template - trait.description_display = description_display - - trait.location_repr = 'N/A' - if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % ( - trait.chr, float(trait.mb)) - - elif dataset.type == "Geno": - trait.location_repr = 'N/A' - if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % ( - trait.chr, float(trait.mb)) - - if get_qtl_info: - # LRS and its location - trait.LRS_score_repr = "N/A" - trait.LRS_location_repr = "N/A" - trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" - if dataset.type == 'ProbeSet' and not trait.cellid: - trait.mean = "" - query = """ - SELECT - ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive - FROM - ProbeSetXRef, ProbeSet - WHERE - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSet.Name = "{}" AND - ProbeSetXRef.ProbeSetFreezeId ={} - """.format(trait.name, dataset.id) - logger.sql(query) - trait_qtl = g.db.execute(query).fetchone() - if trait_qtl: - trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl - if trait.locus: - query = """ - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '{}' and - Geno.Name = '{}' and - Geno.SpeciesId = Species.Id - """.format(dataset.group.species, trait.locus) - logger.sql(query) - result = g.db.execute(query).fetchone() - if result: - trait.locus_chr = result[0] - trait.locus_mb = result[1] + if str(description_string or "") != "" and description_string != 'None': + description_display = description_string + else: + description_display = trait.symbol + + if (str(description_display or "") != "" + and description_display != 'N/A' + and str(target_string or "") != "" and target_string != 'None'): + description_display = description_display + '; ' + target_string.strip() + + # Save it for the jinja2 template + trait.description_display = description_display + + trait.location_repr = 'N/A' + if trait.chr and trait.mb: + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) + + elif dataset.type == "Geno": + trait.location_repr = 'N/A' + if trait.chr and trait.mb: + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) + + if get_qtl_info: + # LRS and its location + trait.LRS_score_repr = "N/A" + trait.LRS_location_repr = "N/A" + trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" + if dataset.type == 'ProbeSet' and not trait.cellid: + trait.mean = "" + cursor.execute( + "SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, " + "ProbeSetXRef.pValue, ProbeSetXRef.mean, " + "ProbeSetXRef.additive FROM ProbeSetXRef, " + "ProbeSet WHERE " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSet.Name = %s AND " + "ProbeSetXRef.ProbeSetFreezeId = %s", + (trait.name, dataset.id,) + ) + trait_qtl = cursor.fetchone() + if trait_qtl: + trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl + if trait.locus: + cursor.execute( + "SELECT Geno.Chr, Geno.Mb FROM " + "Geno, Species WHERE " + "Species.Name = %s AND " + "Geno.Name = %s AND " + "Geno.SpeciesId = Species.Id", + (dataset.group.species, trait.locus,) + ) + if result := cursor.fetchone() : + trait.locus_chr = result[0] + trait.locus_mb = result[1] + else: + trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - if dataset.type == 'Publish': - query = """ - SELECT - PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive - FROM - PublishXRef, PublishFreeze - WHERE - PublishXRef.Id = %s AND - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishFreeze.Id =%s - """ % (trait.name, dataset.id) - logger.sql(query) - trait_qtl = g.db.execute(query).fetchone() - if trait_qtl: - trait.locus, trait.lrs, trait.additive = trait_qtl - if trait.locus: - query = """ - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '{}' and - Geno.Name = '{}' and - Geno.SpeciesId = Species.Id - """.format(dataset.group.species, trait.locus) - logger.sql(query) - result = g.db.execute(query).fetchone() - if result: - trait.locus_chr = result[0] - trait.locus_mb = result[1] + if dataset.type == 'Publish': + cursor.execute( + "SELECT PublishXRef.Locus, PublishXRef.LRS, " + "PublishXRef.additive FROM " + "PublishXRef, PublishFreeze WHERE " + "PublishXRef.Id = %s AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId " + "AND PublishFreeze.Id = %s", (trait.name, dataset.id,) + ) + if trait_qtl := cursor.fetchone(): + trait.locus, trait.lrs, trait.additive = trait_qtl + if trait.locus: + cursor.execute( + "SELECT Geno.Chr, Geno.Mb FROM Geno, " + "Species WHERE Species.Name = %s " + "AND Geno.Name = %s AND " + "Geno.SpeciesId = Species.Id", + (dataset.group.species, trait.locus,) + ) + if result := cursor.fetchone(): + trait.locus_chr = result[0] + trait.locus_mb = result[1] + else: + trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - else: - trait.locus = trait.lrs = trait.additive = "" - if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": - trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( - trait.locus_chr, float(trait.locus_mb)) - if str(trait.lrs or "") != "": - trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs - else: - raise KeyError(repr(trait.name) - + ' information is not found in the database.') - return trait + trait.locus = trait.lrs = trait.additive = "" + if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": + trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( + trait.locus_chr, float(trait.locus_mb)) + if str(trait.lrs or "") != "": + trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs + else: + raise KeyError(repr(trait.name) + + ' information is not found in the database.') + return trait diff --git a/wqflask/utility/authentication_tools.py b/wqflask/utility/authentication_tools.py index 96cc0b00..a8c03fe2 100644 --- a/wqflask/utility/authentication_tools.py +++ b/wqflask/utility/authentication_tools.py @@ -85,12 +85,13 @@ def add_new_resource(dataset, trait_id=None): def get_group_code(dataset): - results = g.db.execute( - "SELECT InbredSetCode from InbredSet where Name='{}'".format( - dataset.group.name)).fetchone() - if results[0]: - return results[0] - else: + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT InbredSetCode FROM InbredSet WHERE Name=%s", + (dataset.group.name,) + ) + if results := cursor.fetchone(): + return results[0] return "" diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 4229a91f..0c0794dc 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -4,7 +4,7 @@ from base.species import TheSpecies from utility import hmac -from flask import g +from wqflask.database import database_connection def get_species_dataset_trait(self, start_vars): @@ -50,16 +50,19 @@ def get_trait_db_obs(self, trait_db_list): def get_species_groups(): """Group each species into a group""" _menu = {} - - for species, group_name in g.db.execute( + species, group_name = None, None + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( "SELECT s.MenuName, i.InbredSetName FROM InbredSet i " "INNER JOIN Species s ON s.SpeciesId = i.SpeciesId " - "ORDER BY i.SpeciesId ASC, i.Name ASC").fetchall(): - if species in _menu: - if _menu.get(species): - _menu = _menu[species].append(group_name) - else: - _menu[species] = [group_name] - return [{"species": key, - "groups": value} for key, value in - list(_menu.items())] + "ORDER BY i.SpeciesId ASC, i.Name ASC" + ) + for species, group_name in cursor.fetchall(): + if species in _menu: + if _menu.get(species): + _menu = _menu[species].append(group_name) + else: + _menu[species] = [group_name] + return [{"species": key, + "groups": value} for key, value in + list(_menu.items())] diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index 95cd2953..ca700083 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -999,14 +999,12 @@ def get_group_id_from_dataset(dataset_name): def get_group_id(group_name): - query = """ - SELECT InbredSet.Id - FROM InbredSet - WHERE InbredSet.Name = "{}" - """.format(group_name) - - group_id = g.db.execute(query).fetchone() - if group_id: - return group_id[0] - else: + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT InbredSet.Id FROM InbredSet " + "WHERE InbredSet.Name = %s", + (group_name,) + ) + if group_id := cursor.fetchone(): + return group_id[0] return None diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py index f5c029a6..d7ed3b9e 100644 --- a/wqflask/wqflask/db_info.py +++ b/wqflask/wqflask/db_info.py @@ -4,8 +4,7 @@ import urllib.error import urllib.parse import re -from flask import Flask, g - +from wqflask.database import database_connection class InfoPage: def __init__(self, start_vars): @@ -40,20 +39,21 @@ class InfoPage: if not all([self.gn_accession_id, self.info_page_name]): raise ValueError('No correct parameter found') - if self.gn_accession_id: - final_query = f"{query_base}GN_AccesionId = {self.gn_accession_id}" - results = g.db.execute(final_query).fetchone() - elif self.info_page_name: - final_query = f"{query_base}InfoPageName = {self.info_page_name}" - results = g.db.execute(final_query).fetchone() - + results = None + with database_connection() as conn, conn.cursor() as cursor: + if self.gn_accession_id: + cursor.execute(f"{query_base}GN_AccesionId = %s", + (self.gn_accession_id,)) + results = cursor.fetchone() + elif self.info_page_name: + cursor.execute(f"{query_base}InfoPageName = %s", + (self.info_page_name,)) + results = cursor.fetchone() if results: self.info = process_query_results(results) - if ((not results or len(results) < 1) and self.info_page_name and create): return self.get_info() - if not self.gn_accession_id and self.info: self.gn_accession_id = self.info['accession_id'] if not self.info_page_name and self.info: diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 97143486..fad6dbcf 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -3,7 +3,7 @@ import re import requests import string -from flask import Flask, g +from wqflask.database import database_connection from utility.db_tools import escape from pprint import pformat as pf @@ -37,8 +37,9 @@ class DoSearch: def execute(self, query): """Executes query and returns results""" query = self.normalize_spaces(query) - results = g.db.execute(query, no_parameters=True).fetchall() - return results + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute(query) + return cursor.fetchall() def handle_wildcard(self, str): keyword = str.strip() diff --git a/wqflask/wqflask/external_tools/send_to_geneweaver.py b/wqflask/wqflask/external_tools/send_to_geneweaver.py index a8066b43..3a95f81d 100644 --- a/wqflask/wqflask/external_tools/send_to_geneweaver.py +++ b/wqflask/wqflask/external_tools/send_to_geneweaver.py @@ -20,7 +20,7 @@ import string -from flask import Flask, g +from wqflask.database import database_connection from base.trait import GeneralTrait, retrieve_trait_info from base.species import TheSpecies @@ -68,44 +68,51 @@ def get_trait_name_list(trait_list): def test_chip(trait_list): final_chip_name = "" - - for trait_db in trait_list: - dataset = trait_db[1] - result = g.db.execute("""SELECT GeneChip.GO_tree_value - FROM GeneChip, ProbeFreeze, ProbeSetFreeze - WHERE GeneChip.Id = ProbeFreeze.ChipId and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() - - if result: - chip_name = result[0] - if chip_name: - if chip_name != final_chip_name: - if final_chip_name: - return "mixed" + with database_connection() as conn, conn.cursor() as cursor: + for trait_db in trait_list: + dataset = trait_db[1] + cursor.execute( + "SELECT GeneChip.GO_tree_value " + "FROM GeneChip, ProbeFreeze, ProbeSetFreeze " + "WHERE GeneChip.Id = ProbeFreeze.ChipId " + "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeSetFreeze.Name = %s", + (dataset.name,) + ) + + if result := cursor.fetchone: + chip_name = result[0] + if chip_name: + if chip_name != final_chip_name: + if final_chip_name: + return "mixed" + else: + final_chip_name = chip_name else: - final_chip_name = chip_name + pass else: - pass + cursor.execute( + "SELECT GeneChip.Name " + "FROM GeneChip, ProbeFreeze, ProbeSetFreeze " + "WHERE GeneChip.Id = ProbeFreeze.ChipId " + "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeSetFreeze.Name = %s", + (dataset.name,) + ) + chip_name = f'{cursor.fetchone()[0]}_NA' + return chip_name else: - result = g.db.execute("""SELECT GeneChip.Name - FROM GeneChip, ProbeFreeze, ProbeSetFreeze - WHERE GeneChip.Id = ProbeFreeze.ChipId and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() - chip_name = '%s_NA' % result[0] - return chip_name - else: - query = """SELECT GeneChip.Name - FROM GeneChip, ProbeFreeze, ProbeSetFreeze - WHERE GeneChip.Id = ProbeFreeze.ChipId and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name - result = g.db.execute(query).fetchone() - if result == None: + cursor.execute( + "SELECT GeneChip.Name FROM GeneChip, " + "ProbeFreeze, ProbeSetFreeze WHERE " + "GeneChip.Id = ProbeFreeze.ChipId " + "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeSetFreeze.Name = %s", + (dataset.name,) + ) + if result := cursor.fetchone(): + chip_name = f'{result[0]}_NA' + return chip_name return "not_microarray" - else: - chip_name = '%s_NA' % result[0] - return chip_name return chip_name diff --git a/wqflask/wqflask/external_tools/send_to_webgestalt.py b/wqflask/wqflask/external_tools/send_to_webgestalt.py index 4de684b0..0cc3fd07 100644 --- a/wqflask/wqflask/external_tools/send_to_webgestalt.py +++ b/wqflask/wqflask/external_tools/send_to_webgestalt.py @@ -20,7 +20,7 @@ import string -from flask import Flask, g +from wqflask.database import database_connection from base.trait import GeneralTrait, retrieve_trait_info from base.species import TheSpecies @@ -71,46 +71,52 @@ class SendToWebGestalt: def test_chip(trait_list): final_chip_name = "" - - for trait_db in trait_list: - dataset = trait_db[1] - result = g.db.execute("""SELECT GeneChip.GO_tree_value - FROM GeneChip, ProbeFreeze, ProbeSetFreeze - WHERE GeneChip.Id = ProbeFreeze.ChipId and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() - - if result: - chip_name = result[0] - if chip_name: - if chip_name != final_chip_name: - if final_chip_name: - return "mixed" + with database_connection() as conn, conn.cursor() as cursor: + for trait_db in trait_list: + dataset = trait_db[1] + cursor.execute( + "SELECT GeneChip.GO_tree_value " + "FROM GeneChip, ProbeFreeze, " + "ProbeSetFreeze WHERE " + "GeneChip.Id = ProbeFreeze.ChipId " + "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeSetFreeze.Name = %s", (dataset.name,) + ) + + if result := cursor.fetchone(): + chip_name = result[0] + if chip_name: + if chip_name != final_chip_name: + if final_chip_name: + return "mixed" + else: + final_chip_name = chip_name else: - final_chip_name = chip_name + pass else: - pass - else: - result = g.db.execute("""SELECT GeneChip.Name - FROM GeneChip, ProbeFreeze, ProbeSetFreeze - WHERE GeneChip.Id = ProbeFreeze.ChipId and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() - chip_name = '%s_NA' % result[0] - return chip_name - else: - query = """SELECT GeneChip.Name - FROM GeneChip, ProbeFreeze, ProbeSetFreeze - WHERE GeneChip.Id = ProbeFreeze.ChipId and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name - result = g.db.execute(query).fetchone() - if result == None: - return "not_microarray" + cursor.execute( + "SELECT GeneChip.Name FROM GeneChip, ProbeFreeze, " + "ProbeSetFreeze WHERE " + "GeneChip.Id = ProbeFreeze.ChipId AND " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeSetFreeze.Name = %s", (dataset.name,) + ) + result = cursor.fetchone() + chip_name = f'{result[0]}_NA' + return chip_name else: - chip_name = '%s_NA' % result[0] - return chip_name - + cursor.execute( + "SELECT GeneChip.Name FROM GeneChip, ProbeFreeze, " + "ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId " + "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND " + "ProbeSetFreeze.Name = %s", (dataset.name,) + ) + result = cursor.fetchone() + if not result: + return "not_microarray" + else: + chip_name = f'{result[0]}_NA' + return chip_name return chip_name diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 56877524..cd3724d9 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -2,7 +2,7 @@ import json import datetime as dt from types import SimpleNamespace -from flask import Flask, g +from wqflask.database import database_connection from base.data_set import create_dataset from base.trait import create_trait from db import webqtlDatabaseFunction @@ -26,45 +26,52 @@ class GSearch: assert(is_str(self.type)) if self.type == "gene": - sql = """ - SELECT - Species.`Name` AS species_name, - InbredSet.`Name` AS inbredset_name, - Tissue.`Name` AS tissue_name, - ProbeSetFreeze.Name AS probesetfreeze_name, - ProbeSetFreeze.FullName AS probesetfreeze_fullname, - ProbeSet.Name AS probeset_name, - ProbeSet.Symbol AS probeset_symbol, - CAST(ProbeSet.`description` AS BINARY) AS probeset_description, - ProbeSet.Chr AS chr, - ProbeSet.Mb AS mb, - ProbeSetXRef.Mean AS mean, - ProbeSetXRef.LRS AS lrs, - ProbeSetXRef.`Locus` AS locus, - ProbeSetXRef.`pValue` AS pvalue, - ProbeSetXRef.`additive` AS additive, - ProbeSetFreeze.Id AS probesetfreeze_id, - Geno.Chr as geno_chr, - Geno.Mb as geno_mb - FROM Species - INNER JOIN InbredSet ON InbredSet.`SpeciesId`=Species.`Id` - INNER JOIN ProbeFreeze ON ProbeFreeze.InbredSetId=InbredSet.`Id` - INNER JOIN Tissue ON ProbeFreeze.`TissueId`=Tissue.`Id` - INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id - INNER JOIN ProbeSetXRef ON ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id - INNER JOIN ProbeSet ON ProbeSet.Id = ProbeSetXRef.ProbeSetId - LEFT JOIN Geno ON ProbeSetXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id - WHERE ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,ProbeSet.alias,ProbeSet.GenbankId, ProbeSet.UniGeneId, ProbeSet.Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) - AND ProbeSetFreeze.confidentiality < 1 - AND ProbeSetFreeze.public > 0 - ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name - LIMIT 6000 - """ % (self.terms) - re = g.db.execute(sql).fetchall() + _result = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Species.`Name` AS species_name, " + "InbredSet.`Name` AS inbredset_name, " + "Tissue.`Name` AS tissue_name, " + "ProbeSetFreeze.Name AS probesetfreeze_name, " + "ProbeSetFreeze.FullName AS " + "probesetfreeze_fullname, ProbeSet.Name AS " + "probeset_name, ProbeSet.Symbol AS " + "probeset_symbol, CAST(ProbeSet.`description` AS BINARY) " + "AS probeset_description, ProbeSet.Chr AS chr, " + "ProbeSet.Mb AS mb, ProbeSetXRef.Mean AS mean, " + "ProbeSetXRef.LRS AS lrs, ProbeSetXRef.`Locus` " + "AS locus, ProbeSetXRef.`pValue` AS , " + "ProbeSetXRef.`additive` AS additive, " + "ProbeSetFreeze.Id AS probesetfreeze_id, " + "Geno.Chr as geno_chr, Geno.Mb as geno_mb " + "FROM Species INNER JOIN InbredSet ON " + "InbredSet.`SpeciesId`=Species.`Id` " + "INNER JOIN ProbeFreeze ON " + "ProbeFreeze.InbredSetId=InbredSet.`Id` " + "INNER JOIN Tissue ON ProbeFreeze.`TissueId`=Tissue.`Id` " + "INNER JOIN ProbeSetFreeze ON " + "ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id " + "INNER JOIN ProbeSetXRef ON " + "ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id " + "INNER JOIN ProbeSet ON " + "ProbeSet.Id = ProbeSetXRef.ProbeSetId " + "LEFT JOIN Geno ON ProbeSetXRef.Locus = Geno.Name " + "AND Geno.SpeciesId = Species.Id WHERE " + "( MATCH " + "(ProbeSet.Name, ProbeSet.description, ProbeSet.symbol, " + "ProbeSet.alias, ProbeSet.GenbankId, ProbeSet.UniGeneId, " + "ProbeSet.Probe_Target_Description) " + "AGAINST (%s IN BOOLEAN MODE) ) " + "AND ProbeSetFreeze.confidentiality < 1 AND " + "ProbeSetFreeze.public > 0 ORDER BY species_name, " + "inbredset_name, tissue_name, probesetfreeze_name, " + "probeset_name LIMIT 6000", (self.terms,) + ) + _result = cursor.fetchall() trait_list = [] dataset_to_permissions = {} - for i, line in enumerate(re): + for i, line in enumerate(_result): this_trait = {} this_trait['index'] = i + 1 this_trait['name'] = line[5] @@ -168,44 +175,43 @@ class GSearch: search_term = self.terms.split("_")[1] group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format( self.terms.split("_")[0]) - sql = """ - SELECT - Species.`Name`, - InbredSet.`Name`, - PublishFreeze.`Name`, - PublishFreeze.`FullName`, - PublishXRef.`Id`, - CAST(Phenotype.`Pre_publication_description` AS BINARY), - CAST(Phenotype.`Post_publication_description` AS BINARY), - Publication.`Authors`, - Publication.`Year`, - Publication.`PubMed_ID`, - PublishXRef.`LRS`, - PublishXRef.`additive`, - InbredSet.`InbredSetCode`, - PublishXRef.`mean`, - PublishFreeze.Id, - Geno.Chr as geno_chr, - Geno.Mb as geno_mb - FROM Species - INNER JOIN InbredSet ON InbredSet.`SpeciesId`=Species.`Id` - INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId`=InbredSet.`Id` - INNER JOIN PublishXRef ON PublishXRef.`InbredSetId`=InbredSet.`Id` - INNER JOIN Phenotype ON PublishXRef.`PhenotypeId`=Phenotype.`Id` - INNER JOIN Publication ON PublishXRef.`PublicationId`=Publication.`Id` - LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id - WHERE - ( - (MATCH (Phenotype.Post_publication_description, Phenotype.Pre_publication_description, Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code) AGAINST ('{1}' IN BOOLEAN MODE) ) - OR (MATCH (Publication.Abstract, Publication.Title, Publication.Authors) AGAINST ('{1}' IN BOOLEAN MODE) ) + _result = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Species.`Name`, InbredSet.`Name`, " + "PublishFreeze.`Name`, PublishFreeze.`FullName`, " + "PublishXRef.`Id`, CAST(Phenotype.`Pre_publication_description` " + "AS BINARY), CAST(Phenotype.`Post_publication_description` " + "AS BINARY), Publication.`Authors`, Publication.`Year`, " + "Publication.`PubMed_ID`, PublishXRef.`LRS`, " + "PublishXRef.`additive`, InbredSet.`InbredSetCode`, " + "PublishXRef.`mean`, PublishFreeze.Id, Geno.Chr as geno_chr, " + "Geno.Mb as geno_mb FROM Species " + "INNER JOIN InbredSet ON InbredSet.`SpeciesId`=Species.`Id` " + "INNER JOIN PublishFreeze ON " + "PublishFreeze.`InbredSetId`=InbredSet.`Id` " + "INNER JOIN PublishXRef ON " + "PublishXRef.`InbredSetId`=InbredSet.`Id` " + "INNER JOIN Phenotype ON " + "PublishXRef.`PhenotypeId`=Phenotype.`Id` " + "INNER JOIN Publication ON " + "PublishXRef.`PublicationId`=Publication.`Id` " + "LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name " + "AND Geno.SpeciesId = Species.Id WHERE " + "((MATCH (Phenotype.Post_publication_description, " + "Phenotype.Pre_publication_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code) AGAINST (%s IN BOOLEAN MODE) ) " + "OR (MATCH (Publication.Abstract, Publication.Title, " + "Publication.Authors) AGAINST (%s IN BOOLEAN MODE) ) " + ") %s ORDER BY Species.`Name`, InbredSet.`Name`, " + "PublishXRef.`Id` LIMIT 6000", + (search_term, search_term, group_clause,) ) - {0} - ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` - LIMIT 6000 - """.format(group_clause, search_term) - re = g.db.execute(sql).fetchall() + _result = cursor.fetchall() trait_list = [] - for i, line in enumerate(re): + for i, line in enumerate(_result): trait_dict = {} trait_dict['index'] = i + 1 trait_dict['name'] = str(line[4]) diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index 5e86ae31..6adf528f 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -1,11 +1,9 @@ import string -from flask import Flask, g +from wqflask.database import database_connection # Just return a list of dictionaries # each dictionary contains sub-dictionary - - def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', @@ -13,94 +11,101 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): # List All Species in the Gene Table speciesDict = {} - results = g.db.execute(""" - SELECT Species.Name, GeneList.SpeciesId - FROM Species, GeneList - WHERE GeneList.SpeciesId = Species.Id - GROUP BY GeneList.SpeciesId""").fetchall() - - for item in results: - speciesDict[item[0]] = item[1] - - # List current Species and other Species - speciesId = speciesDict[species] - otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] - otherSpecies.remove([species, speciesId]) - - results = g.db.execute(""" - SELECT %s FROM GeneList - WHERE SpeciesId = %d AND - Chromosome = '%s' AND - ((TxStart > %f and TxStart <= %f) OR (TxEnd > %f and TxEnd <= %f)) - ORDER BY txStart - """ % (", ".join(fetchFields), - speciesId, chrName, - startMb, endMb, - startMb, endMb)).fetchall() - - GeneList = [] - - if results: - for result in results: - newdict = {} - for j, item in enumerate(fetchFields): - newdict[item] = result[j] - # count SNPs if possible - if diffCol and species == 'mouse': - newdict["snpCount"] = g.db.execute(""" - SELECT count(*) - FROM BXDSnpPosition - WHERE Chr = '%s' AND - Mb >= %2.6f AND Mb < %2.6f AND - StrainId1 = %d AND StrainId2 = %d - """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict["snpDensity"] = newdict["snpCount"] / \ - (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0 - else: - newdict["snpDensity"] = newdict["snpCount"] = 0 - - try: - newdict['GeneLength'] = 1000.0 * \ - (newdict['TxEnd'] - newdict['TxStart']) - except: - pass - - # load gene from other Species by the same name - for item in otherSpecies: - othSpec, othSpecId = item - newdict2 = {} - - resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), - othSpecId, - newdict["GeneSymbol"])).fetchone() - - if resultsOther: - for j, item in enumerate(fetchFields): - newdict2[item] = resultsOther[j] - - # count SNPs if possible, could be a separate function - if diffCol and othSpec == 'mouse': - newdict2["snpCount"] = g.db.execute(""" - SELECT count(*) - FROM BXDSnpPosition - WHERE Chr = '%s' AND - Mb >= %2.6f AND Mb < %2.6f AND - StrainId1 = %d AND StrainId2 = %d - """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - - newdict2["snpDensity"] = newdict2["snpCount"] / \ - (newdict2["TxEnd"] - newdict2["TxStart"]) / 1000.0 - else: - newdict2["snpDensity"] = newdict2["snpCount"] = 0 - - try: - newdict2['GeneLength'] = 1000.0 * \ - (newdict2['TxEnd'] - newdict2['TxStart']) - except: - pass - - newdict['%sGene' % othSpec] = newdict2 - - GeneList.append(newdict) - - return GeneList + results = [] + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute("SELECT Species.Name, GeneList.SpeciesId" + "FROM Species, GeneList WHERE " + "GeneList.SpeciesId = Species.Id " + "GROUP BY GeneList.SpeciesId") + results = cursor.fetchall() + for item in results: + speciesDict[item[0]] = item[1] + + # List current Species and other Species + speciesId = speciesDict[species] + otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] + otherSpecies.remove([species, speciesId]) + cursor.execute("SELECT %s FROM GeneList " + "WHERE SpeciesId = %d AND " + "Chromosome = %s AND " + "((TxStart > %s and TxStart <= %s) " + "OR (TxEnd > %s and TxEnd <= %s)) " + "ORDER BY txStart", + (", ".join(fetchFields), + speciesId, chrName, + startMb, endMb, + startMb, endMb)) + results = cursor.fetchall() + + GeneList = [] + if results: + for result in results: + newdict = {} + for j, item in enumerate(fetchFields): + newdict[item] = result[j] + # count SNPs if possible + if diffCol and species == 'mouse': + cursor.execute( + "SELECT count(*) FROM BXDSnpPosition " + "WHERE Chr = '%s' AND " + "Mb >= %s AND Mb < %s " + "AND StrainId1 = %s AND StrainId2 = %s", + (chrName, f"{newdict['TxStart']:2.6f}", + f"{newdict['TxEnd']:2.6f}", + diffCol[0], diffCol[1],)) + newdict["snpCount"] = cursor.fetchone()[0] + newdict["snpDensity"] = ( + newdict["snpCount"] / + (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0) + else: + newdict["snpDensity"] = newdict["snpCount"] = 0 + try: + newdict['GeneLength'] = 1000.0 * \ + (newdict['TxEnd'] - newdict['TxStart']) + except: + pass + # load gene from other Species by the same name + for item in otherSpecies: + othSpec, othSpecId = item + newdict2 = {} + cursor.execute( + "SELECT %s FROM GeneList WHERE " + "SpeciesId = %s AND " + "geneSymbol= '%s' LIMIT 1", + (", ".join(fetchFields), + othSpecId, + newdict["GeneSymbol"])) + resultsOther = cursor.fetchone() + if resultsOther: + for j, item in enumerate(fetchFields): + newdict2[item] = resultsOther[j] + + # count SNPs if possible, could be a separate function + if diffCol and othSpec == 'mouse': + cursor.execute( + "SELECT count(*) FROM BXDSnpPosition " + "WHERE Chr = %s AND Mb >= %2.6f AND " + "Mb < %2.6f AND StrainId1 = %d " + "AND StrainId2 = %d", + (chrName, f"{newdict['TxStart']:2.6f}", + f"{newdict['TxEnd']:2.6f}", + diffCol[0], diffCol[1])) + if snp_count := cursor.fetchone(): + newdict2["snpCount"] = snp_count[0] + + newdict2["snpDensity"] = ( + newdict2["snpCount"] + / (newdict2["TxEnd"] - newdict2["TxStart"]) + / 1000.0) + else: + newdict2["snpDensity"] = newdict2["snpCount"] = 0 + try: + newdict2['GeneLength'] = ( + 1000.0 * (newdict2['TxEnd'] - newdict2['TxStart'])) + except: + pass + + newdict['%sGene' % othSpec] = newdict2 + + GeneList.append(newdict) + return GeneList diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index ae30aa59..cc6b8e23 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -1,7 +1,7 @@ import re import itertools -from flask import g +from wqflask.database import database_connection from base import webqtlCaseData, webqtlConfig from pprint import pformat as pf @@ -123,15 +123,20 @@ class SampleList: """Finds which extra attributes apply to this dataset""" # Get attribute names and distinct values for each attribute - results = g.db.execute(''' - SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttribute.Description, CaseAttributeXRefNew.Value - FROM CaseAttribute, CaseAttributeXRefNew - WHERE CaseAttributeXRefNew.CaseAttributeId = CaseAttribute.Id - AND CaseAttributeXRefNew.InbredSetId = %s - ORDER BY CaseAttribute.Id''', (str(self.dataset.group.id),)) - + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT DISTINCT CaseAttribute.Id, " + "CaseAttribute.Name, CaseAttribute.Description, " + "CaseAttributeXRefNew.Value FROM " + "CaseAttribute, CaseAttributeXRefNew WHERE " + "CaseAttributeXRefNew.CaseAttributeId = CaseAttribute.Id " + "AND CaseAttributeXRefNew.InbredSetId = %s " + "ORDER BY CaseAttribute.Id", (str(self.dataset.group.id),) + ) self.attributes = {} - for attr, values in itertools.groupby(results.fetchall(), lambda row: (row.Id, row.Name, row.Description)): + for attr, values in itertools.groupby( + cursor.fetchall(), lambda row: (row.Id, row.Name, row.Description) + ): key, name, description = attr self.attributes[key] = Bunch() self.attributes[key].id = key @@ -156,19 +161,23 @@ class SampleList: def get_extra_attribute_values(self): if self.attributes: - query = ''' - SELECT Strain.Name AS SampleName, CaseAttributeId AS Id, CaseAttributeXRefNew.Value - FROM Strain, StrainXRef, InbredSet, CaseAttributeXRefNew - WHERE StrainXRef.StrainId = Strain.Id - AND InbredSet.Id = StrainXRef.InbredSetId - AND CaseAttributeXRefNew.StrainId = Strain.Id - AND InbredSet.Id = CaseAttributeXRefNew.InbredSetId - AND CaseAttributeXRefNew.InbredSetId = %s - ORDER BY SampleName''' % self.dataset.group.id - - results = g.db.execute(query) - - for sample_name, items in itertools.groupby(results.fetchall(), lambda row: row.SampleName): + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name AS SampleName, " + "CaseAttributeId AS Id, " + "CaseAttributeXRefNew.Value FROM Strain, " + "StrainXRef, InbredSet, CaseAttributeXRefNew " + "WHERE StrainXRef.StrainId = Strain.Id " + "AND InbredSet.Id = StrainXRef.InbredSetId " + "AND CaseAttributeXRefNew.StrainId = Strain.Id " + "AND InbredSet.Id = CaseAttributeXRefNew.InbredSetId " + "AND CaseAttributeXRefNew.InbredSetId = %s " + "ORDER BY SampleName", (self.dataset.group.id,) + ) + + for sample_name, items in itertools.groupby( + cursor.fetchall(), lambda row: row.SampleName + ): attribute_values = {} # Make a list of attr IDs without values (that have values for other samples) valueless_attr_ids = [self.attributes[key].id for key in self.attributes.keys()] diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index c1d9ad84..b12b6ba0 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -11,7 +11,7 @@ from collections import OrderedDict import numpy as np import scipy.stats as ss -from flask import g +from wqflask.database import database_connection from base import webqtlConfig from wqflask.show_trait.SampleList import SampleList @@ -84,14 +84,21 @@ class ShowTrait: blatsequence = self.this_trait.blatseq if not blatsequence: # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. - query1 = """SELECT Probe.Sequence, Probe.Name - FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSet.Name = '%s' AND - Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name) - seqs = g.db.execute(query1).fetchall() + seqs = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Probe.Sequence, Probe.Name " + "FROM Probe, ProbeSet, ProbeSetFreeze, " + "ProbeSetXRef WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %s AND " + "ProbeSet.Name = %s AND " + "Probe.ProbeSetId = ProbeSet.Id ORDER " + "BY Probe.SerialOrder", + (self.this_trait.dataset.name, self.this_trait.name,) + ) + seqs = cursor.fetchall() if not seqs: raise ValueError else: @@ -104,15 +111,20 @@ class ShowTrait: blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A' # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. - query2 = """SELECT Probe.Sequence, Probe.Name - FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSet.Name = '%s' AND - Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name) - - seqs = g.db.execute(query2).fetchall() + seqs = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Probe.Sequence, Probe.Name " + "FROM Probe, ProbeSet, ProbeSetFreeze, " + "ProbeSetXRef WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %s AND ProbeSet.Name = %s " + "AND Probe.ProbeSetId = ProbeSet.Id " + "ORDER BY Probe.SerialOrder", + (self.this_trait.dataset.name, self.this_trait.name,) + ) + seqs = cursor.fetchall() for seqt in seqs: if int(seqt[1][-1]) % 2 == 1: blatsequence += '%3EProbe_' + \ @@ -378,12 +390,14 @@ class ShowTrait: if self.dataset.group.species == "mouse": self.aba_link = webqtlConfig.ABA_URL % self.this_trait.symbol - - query = """SELECT chromosome, txStart, txEnd - FROM GeneList - WHERE geneSymbol = '{}'""".format(self.this_trait.symbol) - - results = g.db.execute(query).fetchone() + results = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT chromosome, txStart, txEnd FROM " + "GeneList WHERE geneSymbol = %s", + (self.this_trait.symbol,) + ) + results = cursor.fetchone() if results: chr, transcript_start, transcript_end = results else: @@ -403,15 +417,17 @@ class ShowTrait: self.genemania_link = webqtlConfig.GENEMANIA_URL % ( "rattus-norvegicus", self.this_trait.symbol) - query = """SELECT kgID, chromosome, txStart, txEnd - FROM GeneList_rn33 - WHERE geneSymbol = '{}'""".format(self.this_trait.symbol) - - results = g.db.execute(query).fetchone() - if results: - kgId, chr, transcript_start, transcript_end = results - else: - kgId = chr = transcript_start = transcript_end = None + results = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT kgID, chromosome, txStart, txEnd " + "FROM GeneList_rn33 WHERE geneSymbol = %s", + (self.this_trait.symbol,) + ) + if results := cursor.fetchone(): + kgId, chr, transcript_start, transcript_end = results + else: + kgId = chr = transcript_start = transcript_end = None if chr and transcript_start and transcript_end and kgId: # Convert to bases from megabases @@ -605,19 +621,19 @@ def get_nearest_marker(this_trait, this_db): this_mb = this_trait.locus_mb # One option is to take flanking markers, another is to take the # two (or one) closest - query = """SELECT Geno.Name - FROM Geno, GenoXRef, GenoFreeze - WHERE Geno.Chr = '{}' AND - GenoXRef.GenoId = Geno.Id AND - GenoFreeze.Id = GenoXRef.GenoFreezeId AND - GenoFreeze.Name = '{}' - ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name + "Geno", this_mb) - result = g.db.execute(query).fetchall() - - if result == []: + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Geno.Name FROM Geno, GenoXRef, " + "GenoFreeze WHERE Geno.Chr = %s AND " + "GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Id = GenoXRef.GenoFreezeId " + "AND GenoFreeze.Name = %s ORDER BY " + "ABS( Geno.Mb - %s) LIMIT 1", + (this_chr, f"{this_db.group.name}Geno", this_mb,) + ) + if result := cursor.fetchall(): + return result[0][0] return "" - else: - return result[0][0] def get_table_widths(sample_groups, sample_column_width, has_num_cases=False): |