diff options
-rw-r--r-- | wqflask/base/data_set.py | 711 | ||||
-rw-r--r-- | wqflask/wqflask/api/router.py | 902 |
2 files changed, 810 insertions, 803 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 2f4c1154..ade38131 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -141,18 +141,18 @@ class DatasetType: sql_query_mapping = { 'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM " "ProbeSetFreeze WHERE " - "ProbeSetFreeze.Name = \"%s\" "), + "ProbeSetFreeze.Name = %s "), 'pheno': ("SELECT InfoFiles.GN_AccesionId " "FROM InfoFiles, PublishFreeze, InbredSet " - "WHERE InbredSet.Name = '%s' AND " + "WHERE InbredSet.Name = %s AND " "PublishFreeze.InbredSetId = InbredSet.Id AND " "InfoFiles.InfoPageName = PublishFreeze.Name"), 'other_pheno': ("SELECT PublishFreeze.Name " "FROM PublishFreeze, InbredSet " - "WHERE InbredSet.Name = '%s' AND " + "WHERE InbredSet.Name = %s AND " "PublishFreeze.InbredSetId = InbredSet.Id"), 'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE " - "GenoFreeze.Name = \"%s\" ") + "GenoFreeze.Name = %s ") } dataset_name_mapping = { @@ -166,13 +166,14 @@ class DatasetType: if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") - results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() - if results: - self.datasets[name] = dataset_name_mapping[t] - self.redis_instance.set( - "dataset_structure", json.dumps(self.datasets)) - return True - return None + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute(sql_query_mapping[t], (group_name,)) + if cursor.fetchone(): + self.datasets[name] = dataset_name_mapping[t] + self.redis_instance.set( + "dataset_structure", json.dumps(self.datasets)) + return True + def __call__(self, name): if name not in self.datasets: @@ -348,9 +349,13 @@ class DatasetGroup: self.genofile = None def get_mapping_methods(self): - - mapping_id = g.db.execute( - "select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0] + mapping_id = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT MappingMethodId FROM " + "InbredSet WHERE Name= %s" + (self.name,)) + mapping_id = cursor.fetchone()[0] if mapping_id == "1": mapping_names = ["GEMMA", "QTLReaper", "R/qtl"] elif mapping_id == "2": @@ -589,29 +594,38 @@ class DataSet: } def get_accession_id(self): - if self.type == "Publish": - results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where - InbredSet.Name = %s and - PublishFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = PublishFreeze.Name and - PublishFreeze.public > 0 and - PublishFreeze.confidentiality < 1 order by - PublishFreeze.CreateTime desc""", (self.group.name)).fetchone() - elif self.type == "Geno": - results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where - InbredSet.Name = %s and - GenoFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = GenoFreeze.ShortName and - GenoFreeze.public > 0 and - GenoFreeze.confidentiality < 1 order by - GenoFreeze.CreateTime desc""", (self.group.name)).fetchone() - else: - results = None + results = None + with database_connection() as conn, conn.cursor() as cursor: + if self.type == "Publish": + cursor.execute( + "SELECT InfoFiles.GN_AccesionId FROM " + "InfoFiles, PublishFreeze, InbredSet " + "WHERE InbredSet.Name = %s AND " + "PublishFreeze.InbredSetId = InbredSet.Id " + "AND InfoFiles.InfoPageName = PublishFreeze.Name " + "AND PublishFreeze.public > 0 AND " + "PublishFreeze.confidentiality < 1 " + "ORDER BY PublishFreeze.CreateTime DESC", + (self.group.name,) + ) + results = cursor.fetchone() + elif self.type == "Geno": + cursor.execute( + "SELECT InfoFiles.GN_AccesionId FROM " + "InfoFiles, GenoFreeze, InbredSet " + "WHERE InbredSet.Name = %s AND " + "GenoFreeze.InbredSetId = InbredSet.Id " + "AND InfoFiles.InfoPageName = GenoFreeze.ShortName " + "AND GenoFreeze.public > 0 AND " + "GenoFreeze.confidentiality < 1 " + "ORDER BY GenoFreeze.CreateTime DESC", + (self.group.name,) + ) + results = cursor.fetchone() - if results != None: + if results: return str(results[0]) - else: - return "None" + return "None" def retrieve_other_names(self): """This method fetches the the dataset names in search_result. @@ -624,52 +638,49 @@ class DataSet: all is passed. """ - - try: - query = "" - _vars = None - query_args = (self.name, self.name, self.name) - if self.type == "ProbeSet": - query = ( - "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " - "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, " - "ProbeSetFreeze.DataScale, Tissue.Name " - "FROM ProbeSetFreeze, ProbeFreeze, Tissue " - "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " - "AND ProbeFreeze.TissueId = Tissue.Id " - "AND (ProbeSetFreeze.Name = %s OR ProbeSetFreeze.FullName = %s OR ProbeSetFreeze.ShortName = %s)") - else: - query = ( - "SELECT Id, Name, FullName, ShortName " - f"FROM {self.type}Freeze " - "WHERE (Name = %s OR FullName = %s OR ShortName = %s)") - self.tissue = "N/A" - with database_connection() as conn, conn.cursor() as cursor: - cursor.execute(query, query_args) - _vars = cursor.fetchone() + with database_connection() as conn, conn.cursor() as cursor: + try: if self.type == "ProbeSet": + cursor.execute( + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " + "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, " + "ProbeSetFreeze.DataScale, Tissue.Name " + "FROM ProbeSetFreeze, ProbeFreeze, Tissue " + "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeFreeze.TissueId = Tissue.Id " + "AND (ProbeSetFreeze.Name = %s OR " + "ProbeSetFreeze.FullName = %s " + "OR ProbeSetFreeze.ShortName = %s)", + (self.name,)*3) (self.id, self.name, self.fullname, self.shortname, - self.data_scale, self.tissue) = _vars + self.data_scale, self.tissue) = cursor.fetchone() else: - self.id, self.name, self.fullname, self.shortname = _vars - except TypeError: - pass + cursor.execute( + "SELECT Id, Name, FullName, ShortName " + f"FROM {self.type}Freeze " + "WHERE (Name = %s OR FullName = " + "%s OR ShortName = %s)", + (self.name,)*3) + (self.id, self.name, self.fullname, self.shortname, + self.data_scale, self.tissue) = cursor.fetchone() + self.tissue = "N/A" + except TypeError: + pass def chunk_dataset(self, dataset, n): results = {} - - query = """ - SELECT ProbeSetXRef.DataId,ProbeSet.Name - FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze - WHERE ProbeSetFreeze.Name = '{}' AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id - """.format(self.name) - - # should cache this - - traits_name_dict = dict(g.db.execute(query).fetchall()) + traits_name_dict = () + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT ProbeSetXRef.DataId,ProbeSet.Name " + "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze " + "WHERE ProbeSetFreeze.Name = %s AND " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id", + (self.name,)) + # should cache this + traits_name_dict = dict(cursor.fetchall()) for i in range(0, len(dataset), n): matrix = list(dataset[i:i + n]) @@ -691,31 +702,35 @@ class DataSet: if self.group.parlist != None and self.group.f1list != None: if (self.group.parlist + self.group.f1list) in self.samplelist: self.samplelist += self.group.parlist + self.group.f1list - - query = """ - SELECT Strain.Name, Strain.Id FROM Strain, Species - WHERE Strain.Name IN {} - and Strain.SpeciesId=Species.Id - and Species.name = '{}' - """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) - results = dict(g.db.execute(query).fetchall()) - sample_ids = [results[item] for item in self.samplelist] - - sorted_samplelist = [strain_name for strain_name, strain_id in sorted( - results.items(), key=lambda item: item[1])] - - query = """SELECT * from ProbeSetData - where StrainID in {} - and id in (SELECT ProbeSetXRef.DataId - FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name) - - query_results = list(g.db.execute(query).fetchall()) - data_results = self.chunk_dataset(query_results, len(sample_ids)) - self.samplelist = sorted_samplelist - self.trait_data = data_results + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, Strain.Id FROM " + "Strain, Species WHERE Strain.Name IN " + f"{create_in_clause(self.samplelist)} " + "AND Strain.SpeciesId=Species.Id AND " + "Species.name = %s", (self.group.species,) + ) + results = dict(cursor.fetchall()) + sample_ids = [results[item] for item in self.samplelist] + + sorted_samplelist = [strain_name for strain_name, strain_id in sorted( + results.items(), key=lambda item: item[1])] + + cursor.execute( + "SELECT * from ProbeSetData WHERE StrainID IN " + f"{create_in_clause(sample_ids)} AND id IN " + "(SELECT ProbeSetXRef.DataId FROM " + "(ProbeSet, ProbeSetXRef, ProbeSetFreeze) " + "WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s AND " + "ProbeSet.Id = ProbeSetXRef.ProbeSetId)", + (self.name,) + ) + + query_results = list(cursor.fetchall()) + data_results = self.chunk_dataset(query_results, len(sample_ids)) + self.samplelist = sorted_samplelist + self.trait_data = data_results def get_trait_data(self, sample_list=None): if sample_list: @@ -727,81 +742,84 @@ class DataSet: if (self.group.parlist + self.group.f1list) in self.samplelist: self.samplelist += self.group.parlist + self.group.f1list - query = """ - SELECT Strain.Name, Strain.Id FROM Strain, Species - WHERE Strain.Name IN {} - and Strain.SpeciesId=Species.Id - and Species.name = '{}' - """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) - results = dict(g.db.execute(query).fetchall()) - sample_ids = [results.get(item) - for item in self.samplelist if item is not None] - - # MySQL limits the number of tables that can be used in a join to 61, - # so we break the sample ids into smaller chunks - # Postgres doesn't have that limit, so we can get rid of this after we transition - chunk_size = 50 - number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) - - cached_results = fetch_cached_results(self.name, self.type, self.samplelist) - if cached_results is None: - trait_sample_data = [] - for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): - if self.type == "Publish": - dataset_type = "Phenotype" - else: - dataset_type = self.type - temp = ['T%s.value' % item for item in sample_ids_step] - if self.type == "Publish": - query = "SELECT {}XRef.Id,".format(escape(self.type)) - else: - query = "SELECT {}.Name,".format(escape(dataset_type)) + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, Strain.Id FROM Strain, Species " + f"WHERE Strain.Name IN {create_in_clause(self.samplelist)} " + "AND Strain.SpeciesId=Species.Id " + "AND Species.name = %s", + (self.group.species,) + ) + results = dict(cursor.fetchall()) + sample_ids = [results.get(item) + for item in self.samplelist if item is not None] + + # MySQL limits the number of tables that can be used in a join to 61, + # so we break the sample ids into smaller chunks + # Postgres doesn't have that limit, so we can get rid of this after we transition + chunk_size = 50 + number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) + + cached_results = fetch_cached_results(self.name, self.type, self.samplelist) + + if cached_results is None: + trait_sample_data = [] + for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): + if self.type == "Publish": + dataset_type = "Phenotype" + else: + dataset_type = self.type + temp = ['T%s.value' % item for item in sample_ids_step] + if self.type == "Publish": + query = "SELECT {}XRef.Id,".format(escape(self.type)) + else: + query = "SELECT {}.Name,".format(escape(dataset_type)) + data_start_pos = 1 + query += ', '.join(temp) + query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, + self.type, + self.type)) + + for item in sample_ids_step: + query += """ + left join {}Data as T{} on T{}.Id = {}XRef.DataId + and T{}.StrainId={}\n + """.format(*mescape(self.type, item, item, self.type, item, item)) + + if self.type == "Publish": + query += """ + WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(self.type, self.type, self.type, self.name, + dataset_type, self.type, dataset_type, dataset_type)) + else: + query += """ + WHERE {}XRef.{}FreezeId = {}Freeze.Id + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(self.type, self.type, self.type, self.type, + self.name, dataset_type, self.type, self.type, dataset_type)) + cursor.execute(query) + results = cursor.fetchall() + trait_sample_data.append([list(result) for result in results]) + + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) + data_start_pos = 1 - query += ', '.join(temp) - query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, - self.type, - self.type)) - - for item in sample_ids_step: - query += """ - left join {}Data as T{} on T{}.Id = {}XRef.DataId - and T{}.StrainId={}\n - """.format(*mescape(self.type, item, item, self.type, item, item)) - - if self.type == "Publish": - query += """ - WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId - and {}Freeze.Name = '{}' - and {}.Id = {}XRef.{}Id - order by {}.Id - """.format(*mescape(self.type, self.type, self.type, self.name, - dataset_type, self.type, dataset_type, dataset_type)) - else: - query += """ - WHERE {}XRef.{}FreezeId = {}Freeze.Id - and {}Freeze.Name = '{}' - and {}.Id = {}XRef.{}Id - order by {}.Id - """.format(*mescape(self.type, self.type, self.type, self.type, - self.name, dataset_type, self.type, self.type, dataset_type)) - - results = g.db.execute(query).fetchall() - trait_sample_data.append([list(result) for result in results]) - - trait_count = len(trait_sample_data[0]) - self.trait_data = collections.defaultdict(list) - - data_start_pos = 1 - for trait_counter in range(trait_count): - trait_name = trait_sample_data[0][trait_counter][0] - for chunk_counter in range(int(number_chunks)): - self.trait_data[trait_name] += ( - trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) - - cache_dataset_results( - self.name, self.type, self.samplelist, self.trait_data) - else: - self.trait_data = cached_results + for trait_counter in range(trait_count): + trait_name = trait_sample_data[0][trait_counter][0] + for chunk_counter in range(int(number_chunks)): + self.trait_data[trait_name] += ( + trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) + + cache_dataset_results( + self.name, self.type, self.samplelist, self.trait_data) + else: + self.trait_data = cached_results class PhenotypeDataSet(DataSet): @@ -897,43 +915,41 @@ SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType, InbredSet.InbredSetC this_trait.LRS_location_repr = "N/A" if this_trait.lrs: - query = """ - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, this_trait.locus) - result = g.db.execute(query).fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( - LRS_Chr, float(LRS_Mb)) + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Geno.Chr, Geno.Mb FROM " + "Geno, Species WHERE " + "Species.Name = %s AND " + "Geno.Name = %s AND " + "Geno.SpeciesId = Species.Id", + (species, this_trait.locus,) + ) + if result := cursor.fetchone(): + if result[0] and result[1]: + LRS_Chr, LRS_Mb = result[0], result[1] + this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( + LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): - query = """ - SELECT - Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2 - FROM - (PublishData, Strain, PublishXRef, PublishFreeze) - left join PublishSE on - (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) - left join NStrain on - (NStrain.DataId = PublishData.Id AND - NStrain.StrainId = PublishData.StrainId) - WHERE - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND - PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id - Order BY - Strain.Name - """ - results = g.db.execute(query, (trait, self.id)).fetchall() - return results + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, PublishData.value, " + "PublishSE.error, NStrain.count, " + "Strain.Name2 FROM (PublishData, Strain, " + "PublishXRef, PublishFreeze) LEFT JOIN " + "PublishSE ON " + "(PublishSE.DataId = PublishData.Id " + "AND PublishSE.StrainId = PublishData.StrainId) " + "LEFT JOIN NStrain ON " + "(NStrain.DataId = PublishData.Id AND " + "NStrain.StrainId = PublishData.StrainId) " + "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " + "AND PublishData.Id = PublishXRef.DataId AND " + "PublishXRef.Id = %s AND PublishFreeze.Id = %s " + "AND PublishData.StrainId = Strain.Id " + "ORDER BY Strain.Name", (trait, self.id)) + return cursor.fetchall() class GenotypeDataSet(DataSet): @@ -976,25 +992,25 @@ GenoFreeze.Name = %s""" this_trait.chr, float(this_trait.mb)) def retrieve_sample_data(self, trait): - query = """ - SELECT - Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 - FROM - (GenoData, GenoFreeze, Strain, Geno, GenoXRef) - left join GenoSE on - (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) - WHERE - Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = %s AND - GenoXRef.DataId = GenoData.Id AND - GenoData.StrainId = Strain.Id - Order BY - Strain.Name - """ - results = g.db.execute(query, - (webqtlDatabaseFunction.retrieve_species_id(self.group.name), - trait, self.name)).fetchall() + results = [] + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, GenoData.value, " + "GenoSE.error, 'N/A', Strain.Name2 " + "FROM (GenoData, GenoFreeze, Strain, Geno, " + "GenoXRef) LEFT JOIN GenoSE ON " + "(GenoSE.DataId = GenoData.Id AND " + "GenoSE.StrainId = GenoData.StrainId) " + "WHERE Geno.SpeciesId = %s AND " + "Geno.Name = %s AND GenoXRef.GenoId = Geno.Id " + "AND GenoXRef.GenoFreezeId = GenoFreeze.Id " + "AND GenoFreeze.Name = %s AND " + "GenoXRef.DataId = GenoData.Id " + "AND GenoData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (webqtlDatabaseFunction.retrieve_species_id(self.group.name), + trait, self.name,)) + results = list(cursor.fetchall()) if self.group.name in webqtlUtil.ParInfo: f1_1, f1_2, ref, nonref = webqtlUtil.ParInfo[self.group.name] @@ -1073,114 +1089,113 @@ ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND ProbeSetFreeze.Name = %s""" # Note: setting trait_list to [] is probably not a great idea. if not trait_list: trait_list = [] + with database_connection() as conn, conn.cursor() as cursor: + for this_trait in trait_list: - for this_trait in trait_list: - - if not this_trait.haveinfo: - this_trait.retrieveInfo(QTL=1) - - if not this_trait.symbol: - this_trait.symbol = "N/A" - - # XZ, 12/08/2008: description - # XZ, 06/05/2009: Rob asked to add probe target description - description_string = str( - str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = str( - str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') - - if len(description_string) > 1 and description_string != 'None': - description_display = description_string - else: - description_display = this_trait.symbol - - if (len(description_display) > 1 and description_display != 'N/A' - and len(target_string) > 1 and target_string != 'None'): - description_display = description_display + '; ' + target_string.strip() - - # Save it for the jinja2 template - this_trait.description_display = description_display - - if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( - this_trait.chr, float(this_trait.mb)) - - # Get mean expression value - query = ( - """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' - """ % (escape(str(this_trait.dataset.id)), - escape(this_trait.name))) - result = g.db.execute(query).fetchone() + if not this_trait.haveinfo: + this_trait.retrieveInfo(QTL=1) - mean = result[0] if result else 0 + if not this_trait.symbol: + this_trait.symbol = "N/A" - if mean: - this_trait.mean = "%2.3f" % mean + # XZ, 12/08/2008: description + # XZ, 06/05/2009: Rob asked to add probe target description + description_string = str( + str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') + target_string = str( + str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') - # LRS and its location - this_trait.LRS_score_repr = 'N/A' - this_trait.LRS_location_repr = 'N/A' - - # Max LRS and its Locus location - if this_trait.lrs and this_trait.locus: - query = """ - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '{}' and - Geno.Name = '{}' and - Geno.SpeciesId = Species.Id - """.format(species, this_trait.locus) - result = g.db.execute(query).fetchone() - - if result: - lrs_chr, lrs_mb = result - this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( - lrs_chr, float(lrs_mb)) + if len(description_string) > 1 and description_string != 'None': + description_display = description_string + else: + description_display = this_trait.symbol + + if (len(description_display) > 1 and description_display != 'N/A' + and len(target_string) > 1 and target_string != 'None'): + description_display = description_display + '; ' + target_string.strip() + + # Save it for the jinja2 template + this_trait.description_display = description_display + + if this_trait.chr and this_trait.mb: + this_trait.location_repr = 'Chr%s: %.6f' % ( + this_trait.chr, float(this_trait.mb)) + + # Get mean expression value + cursor.execute( + "SELECT ProbeSetXRef.mean FROM " + "ProbeSetXRef, ProbeSet WHERE " + "ProbeSetXRef.ProbeSetFreezeId = %s " + "AND ProbeSet.Id = ProbeSetXRef.ProbeSetId " + "AND ProbeSet.Name = %s", + (str(this_trait.dataset.id), this_trait.name,) + ) + result = cursor.fetchone() + + mean = result[0] if result else 0 + + if mean: + this_trait.mean = "%2.3f" % mean + + # LRS and its location + this_trait.LRS_score_repr = 'N/A' + this_trait.LRS_location_repr = 'N/A' + + # Max LRS and its Locus location + if this_trait.lrs and this_trait.locus: + cursor.execute( + "SELECT Geno.Chr, Geno.Mb FROM " + "Geno, Species WHERE " + "Species.Name = %s AND " + "Geno.Name = %s AND " + "Geno.SpeciesId = Species.Id", + (species, this_trait.locus,) + ) + if result := cursor.fetchone(): + lrs_chr, lrs_mb = result + this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( + lrs_chr, float(lrs_mb)) return trait_list def retrieve_sample_data(self, trait): - query = """ - SELECT - Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 - FROM - (ProbeSetData, ProbeSetFreeze, - Strain, ProbeSet, ProbeSetXRef) - left join ProbeSetSE on - (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) - left join NStrain on - (NStrain.DataId = ProbeSetData.Id AND - NStrain.StrainId = ProbeSetData.StrainId) - WHERE - ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSetXRef.DataId = ProbeSetData.Id AND - ProbeSetData.StrainId = Strain.Id - Order BY - Strain.Name - """ % (escape(trait), escape(self.name)) - results = g.db.execute(query).fetchall() - return results + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, ProbeSetData.value, " + "ProbeSetSE.error, NStrain.count, " + "Strain.Name2 FROM (ProbeSetData, " + "ProbeSetFreeze, Strain, ProbeSet, " + "ProbeSetXRef) LEFT JOIN ProbeSetSE ON " + "(ProbeSetSE.DataId = ProbeSetData.Id AND " + "ProbeSetSE.StrainId = ProbeSetData.StrainId) " + "LEFT JOIN NStrain ON " + "(NStrain.DataId = ProbeSetData.Id AND " + "NStrain.StrainId = ProbeSetData.StrainId) " + "WHERE ProbeSet.Name = %s AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s AND " + "ProbeSetXRef.DataId = ProbeSetData.Id " + "AND ProbeSetData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (trait, self.name) + ) + return cursor.fetchall() def retrieve_genes(self, column_name): - query = """ - select ProbeSet.Name, ProbeSet.%s - from ProbeSet,ProbeSetXRef - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSetXRef.ProbeSetId=ProbeSet.Id; - """ % (column_name, escape(str(self.id))) - results = g.db.execute(query).fetchall() - - return dict(results) + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + f"SELECT ProbeSet.Name, ProbeSet.{column_name} " + "FROM ProbeSet,ProbeSetXRef WHERE " + "ProbeSetXRef.ProbeSetFreezeId = %s " + "AND ProbeSetXRef.ProbeSetId=ProbeSet.Id", + (str(self.id),)) + return dict(cursor.fetchall()) class TempDataSet(DataSet): - '''Temporary user-generated data set''' - + """Temporary user-generated data set""" DS_NAME_MAP['Temp'] = 'TempDataSet' def setup(self): @@ -1202,30 +1217,15 @@ class TempDataSet(DataSet): def geno_mrna_confidentiality(ob): - dataset_table = ob.type + "Freeze" - - query = '''SELECT Id, Name, FullName, confidentiality, - AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) - result = g.db.execute(query) - - (dataset_id, - name, - full_name, - confidential, - authorized_users) = result.fetchall()[0] - - if confidential: - return True - result = g.db.execute(query) - - (dataset_id, - name, - full_name, - confidential, - authorized_users) = result.fetchall()[0] - - if confidential: - return True + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT confidentiality, " + "AuthorisedUsers FROM %s WHERE Name = %s", + (f"{ob.type}Freeze", ob.name,) + ) + result = cursor.fetchall()[0] + if result: + return True def parse_db_url(): @@ -1239,16 +1239,15 @@ def query_table_timestamp(dataset_type: str): """function to query the update timestamp of a given dataset_type""" # computation data and actions - - fetch_db_name = parse_db_url() - query_update_time = f""" - SELECT UPDATE_TIME FROM information_schema.tables - WHERE TABLE_SCHEMA = '{fetch_db_name[-1]}' - AND TABLE_NAME = '{dataset_type}Data' - """ - - date_time_obj = g.db.execute(query_update_time).fetchone()[0] - return date_time_obj.strftime("%Y-%m-%d %H:%M:%S") + with database_connection() as conn, conn.cursor() as cursor: + fetch_db_name = parse_db_url() + cursor.execute( + "SELECT UPDATE_TIME FROM " + "information_schema.tables " + f"WHERE TABLE_SCHEMA = '{fetch_db_name[-1]}' " + f"AND TABLE_NAME = '{dataset_type}Data'") + date_time_obj = cursor.fetchone()[0] + return date_time_obj.strftime("%Y-%m-%d %H:%M:%S") def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str, samplelist: str): diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index ca700083..7335ec4f 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -12,7 +12,7 @@ from zipfile import ZipFile, ZIP_DEFLATED import flask from flask import current_app -from flask import g +from wqflask.database import database_connection from flask import request from flask import make_response from flask import send_file @@ -36,62 +36,72 @@ def hello_world(): @app.route("/api/v_{}/species".format(version)) def get_species_list(): - results = g.db.execute( - "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") - the_species = results.fetchall() species_list = [] - for species in the_species: - species_dict = { + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species" + ) + for species in cursor.fetchall(): + species_dict = { "Id": species[0], "Name": species[1], "FullName": species[2], "TaxonomyId": species[3] } species_list.append(species_dict) - return flask.jsonify(species_list) @app.route("/api/v_{}/species/<path:species_name>".format(version)) @app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version)) def get_species_info(species_name, file_format="json"): - results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId - FROM Species - WHERE (Name="{0}" OR FullName="{0}" OR SpeciesName="{0}");""".format(species_name)) - - the_species = results.fetchone() - species_dict = { - "Id": the_species[0], - "Name": the_species[1], - "FullName": the_species[2], - "TaxonomyId": the_species[3] - } + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId " + "FROM Species WHERE (Name=%s OR FullName=%s " + "OR SpeciesName=%s)", ((species_name,)*3)) + _species = cursor.fetchone() + species_dict = { + "Id": _species[0], + "Name": _species[1], + "FullName": _species[2], + "TaxonomyId": _species[3] + } - return flask.jsonify(species_dict) + return flask.jsonify(species_dict) @app.route("/api/v_{}/groups".format(version)) @app.route("/api/v_{}/groups/<path:species_name>".format(version)) def get_groups_list(species_name=None): - if species_name: - results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, - InbredSet.Name, InbredSet.FullName, InbredSet.public, - IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None") - FROM InbredSet, Species - WHERE InbredSet.SpeciesId = Species.Id AND - (Species.Name = "{0}" OR - Species.FullName="{0}" OR - Species.SpeciesName="{0}");""".format(species_name)) - else: - results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, - InbredSet.Name, InbredSet.FullName, InbredSet.public, - IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None") - FROM InbredSet;""") - - the_groups = results.fetchall() - if the_groups: + _groups = () + with database_connection() as conn, conn.cursor() as cursor: + if species_name: + cursor.execute( + "SELECT InbredSet.InbredSetId, " + "InbredSet.SpeciesId, InbredSet.InbredSetName, " + "InbredSet.Name, InbredSet.FullName, " + "InbredSet.public, IFNULL(InbredSet.MappingMethodId, " + "'None'), IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet, Species WHERE " + "InbredSet.SpeciesId = Species.Id AND " + "(Species.Name = %s OR Species.FullName=%s " + "OR Species.SpeciesName=%s)", ((species_name,) * 3) + ) + else: + cursor.execute( + "SELECT InbredSet.InbredSetId, " + "InbredSet.SpeciesId, InbredSet.InbredSetName, " + "InbredSet.Name, InbredSet.FullName, " + "InbredSet.public, IFNULL(InbredSet.MappingMethodId, " + "'None'), IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet" + ) + _groups = cursor.fetchall() + + if _groups: groups_list = [] - for group in the_groups: + for group in _groups: group_dict = { "Id": group[0], "SpeciesId": group[1], @@ -103,10 +113,8 @@ def get_groups_list(species_name=None): "GeneticType": group[7] } groups_list.append(group_dict) - return flask.jsonify(groups_list) - else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @app.route("/api/v_{}/group/<path:group_name>".format(version)) @@ -114,26 +122,37 @@ def get_groups_list(species_name=None): @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version)) def get_group_info(group_name, species_name=None, file_format="json"): - if species_name: - results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, - InbredSet.Name, InbredSet.FullName, InbredSet.public, - IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None") - FROM InbredSet, Species - WHERE InbredSet.SpeciesId = Species.Id AND - (InbredSet.InbredSetName = "{0}" OR - InbredSet.Name = "{0}" OR - InbredSet.FullName = "{0}") AND - (Species.Name = "{1}" OR - Species.FullName="{1}" OR - Species.SpeciesName="{1}");""".format(group_name, species_name)) - else: - results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, - InbredSet.Name, InbredSet.FullName, InbredSet.public, - IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None") - FROM InbredSet - WHERE (InbredSet.InbredSetName = "{0}" OR - InbredSet.Name = "{0}" OR - InbredSet.FullName = "{0}");""".format(group_name)) + group = () + with database_connection() as conn, conn.cursor() as cursor: + if species_name: + cursor.execute( + "SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, " + "InbredSet.InbredSetName, InbredSet.Name, " + "InbredSet.FullName, InbredSet.public, " + "IFNULL(InbredSet.MappingMethodId, 'None'), " + "IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet, Species WHERE " + "InbredSet.SpeciesId = Species.Id " + "AND (InbredSet.InbredSetName = %s OR " + "InbredSet.Name = %s OR InbredSet.FullName = %s) " + "AND (Species.Name = %s OR " + "Species.FullName = %s OR Species.SpeciesName = %s)", + (*(group_name,)*3, *(species_name,)*3) + ) + else: + cursor.execute( + "SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, " + "InbredSet.InbredSetName, InbredSet.Name, " + "InbredSet.FullName, InbredSet.public, " + "IFNULL(InbredSet.MappingMethodId, 'None'), " + "IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet WHERE " + "(InbredSet.InbredSetName = %s OR " + "InbredSet.Name = %s OR " + "InbredSet.FullName = %s)", + ((group_name,)*3) + ) + results = cursor.fetchone() group = results.fetchone() if group: @@ -156,36 +175,48 @@ def get_group_info(group_name, species_name=None, file_format="json"): @app.route("/api/v_{}/datasets/<path:group_name>".format(version)) @app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version)) def get_datasets_for_group(group_name, species_name=None): - if species_name: - results = g.db.execute(""" - SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID, - ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, - ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, - ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale - FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species - WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND - ProbeFreeze.InbredSetId = InbredSet.Id AND - (InbredSet.Name = "{0}" OR InbredSet.InbredSetName = "{0}" OR InbredSet.FullName = "{0}") AND - InbredSet.SpeciesId = Species.Id AND - (Species.SpeciesName = "{1}" OR Species.MenuName = "{1}" OR Species.FullName = "{1}"); - """.format(group_name, species_name)) - else: - results = g.db.execute(""" - SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID, - ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, - ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, - ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale - FROM ProbeSetFreeze, ProbeFreeze, InbredSet - WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND - ProbeFreeze.InbredSetId = InbredSet.Id AND - (InbredSet.Name = "{0}" OR InbredSet.InbredSetName = "{0}" OR InbredSet.FullName = "{0}"); - """.format(group_name)) - - the_datasets = results.fetchall() - - if the_datasets: + _datasets = () + with database_connection() as conn, conn.cursor() as cursor: + if species_name: + cursor.execute( + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, " + "ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, " + "ProbeSetFreeze.Name2, " + "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, " + "ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, " + "ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale " + "FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species " + "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeFreeze.InbredSetId = InbredSet.Id " + "AND (InbredSet.Name = %s OR " + "InbredSet.InbredSetName = %s OR " + "InbredSet.FullName = %s) AND " + "InbredSet.SpeciesId = Species.Id AND " + "(Species.SpeciesName = %s OR " + "Species.MenuName = %s OR Species.FullName = %s);", + (*(group_name,)*3, *(species_name)*3) + ) + else: + cursor.execute( + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, " + "ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, " + "ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, " + "ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, " + "ProbeSetFreeze.public, ProbeSetFreeze.confidentiality, " + "ProbeSetFreeze.DataScale FROM ProbeSetFreeze, " + "ProbeFreeze, InbredSet WHERE " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeFreeze.InbredSetId = InbredSet.Id " + "AND (InbredSet.Name = %s OR " + "InbredSet.InbredSetName = %s OR " + "InbredSet.FullName = %s)", + ((group_name,) * 3) + ) + _datasets = cursor.fetchall() + + if _datasets: datasets_list = [] - for dataset in the_datasets: + for dataset in _datasets: dataset_dict = { "Id": dataset[0], "ProbeFreezeId": dataset[1], @@ -213,8 +244,10 @@ def get_datasets_for_group(group_name, species_name=None): def get_dataset_info(dataset_name, group_name=None, file_format="json"): # ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets - datasets_list = [] # ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice - + # ZS: I figure I might as well return a list if there are multiple + # matches, though I don"t know if this will actually happen in + # practice + datasets_list, dataset_dict = [], {} probeset_query = """ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId, @@ -237,71 +270,71 @@ def get_dataset_info(dataset_name, group_name=None, file_format="json"): (ProbeSetFreeze.Name = "{0}" OR ProbeSetFreeze.Name2 = "{0}" OR ProbeSetFreeze.FullName = "{0}" OR ProbeSetFreeze.ShortName = "{0}") """.format(dataset_name) + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute(f"{probeset_query}{where_statement}") - probeset_query += where_statement - probeset_results = g.db.execute(probeset_query) - dataset = probeset_results.fetchone() - - if dataset: - dataset_dict = { - "dataset_type": "mRNA expression", - "id": dataset[0], - "name": dataset[1], - "full_name": dataset[2], - "short_name": dataset[3], - "data_scale": dataset[4], - "tissue_id": dataset[5], - "tissue": dataset[6], - "public": dataset[7], - "confidential": dataset[8] - } - - datasets_list.append(dataset_dict) - - if group_name: - pheno_query = """ - SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description, - Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description, - Publication.PubMed_ID, Publication.Title, Publication.Year - FROM PublishXRef, Phenotype, Publication, InbredSet, PublishFreeze - WHERE PublishXRef.InbredSetId = InbredSet.Id AND - PublishXRef.PhenotypeId = Phenotype.Id AND - PublishXRef.PublicationId = Publication.Id AND - PublishFreeze.InbredSetId = InbredSet.Id AND - PublishFreeze.public > 0 AND - PublishFreeze.confidentiality < 1 AND - InbredSet.Name = "{0}" AND PublishXRef.Id = "{1}" - """.format(group_name, dataset_name) - - pheno_results = g.db.execute(pheno_query) - dataset = pheno_results.fetchone() - - if dataset: - if dataset[5]: - dataset_dict = { - "dataset_type": "phenotype", - "id": dataset[0], - "name": dataset[1], - "description": dataset[2], - "pubmed_id": dataset[5], - "title": dataset[6], - "year": dataset[7] - } - elif dataset[4]: - dataset_dict = { - "dataset_type": "phenotype", - "id": dataset[0], - "name": dataset[3], - "description": dataset[4] - } - else: - dataset_dict = { - "dataset_type": "phenotype", - "id": dataset[0] - } + if dataset := cursor.fetchone(): + dataset_dict = { + "dataset_type": "mRNA expression", + "id": dataset[0], + "name": dataset[1], + "full_name": dataset[2], + "short_name": dataset[3], + "data_scale": dataset[4], + "tissue_id": dataset[5], + "tissue": dataset[6], + "public": dataset[7], + "confidential": dataset[8] + } datasets_list.append(dataset_dict) + if group_name: + cursor.execute( + "SELECT PublishXRef.Id, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Post_publication_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Pre_publication_description, " + "Publication.PubMed_ID, Publication.Title, " + "Publication.Year FROM PublishXRef, Phenotype, " + "Publication, InbredSet, PublishFreeze WHERE " + "PublishXRef.InbredSetId = InbredSet.Id " + "AND PublishXRef.PhenotypeId = Phenotype.Id " + "AND PublishXRef.PublicationId = Publication.Id " + "AND PublishFreeze.InbredSetId = InbredSet.Id " + "AND PublishFreeze.public > 0 AND " + "PublishFreeze.confidentiality < 1 " + "AND InbredSet.Name = %s AND PublishXRef.Id = %s", + (group_name, dataset_name,) + ) + + if dataset := cursor.fetchone(): + if dataset[5]: + dataset_dict = { + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[1], + "description": dataset[2], + "pubmed_id": dataset[5], + "title": dataset[6], + "year": dataset[7] + } + elif dataset[4]: + dataset_dict = { + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[3], + "description": dataset[4] + } + else: + dataset_dict = { + "dataset_type": "phenotype", + "id": dataset[0] + } + + datasets_list.append(dataset_dict) + if len(datasets_list) > 1: return flask.jsonify(datasets_list) elif len(datasets_list) == 1: @@ -403,44 +436,48 @@ def fetch_traits(dataset_name, file_format="json"): if 'limit_to' in request.args: limit_number = request.args['limit_to'] query += "LIMIT " + str(limit_number) - - if file_format == "json": - filename = dataset_name + "_traits.json" - - final_query = query.format(dataset_id) - - result_list = [] - for result in g.db.execute(final_query).fetchall(): - trait_dict = {} - for i, field in enumerate(field_list): - if result[i]: - trait_dict[field] = result[i] - result_list.append(trait_dict) - - return flask.jsonify(result_list) - elif file_format == "csv": - filename = dataset_name + "_traits.csv" - - results_list = [] - header_list = [] - header_list += field_list - results_list.append(header_list) - - final_query = query.format(dataset_id) - for result in g.db.execute(final_query).fetchall(): - results_list.append(result) - - si = io.StringIO() - csv_writer = csv.writer(si) - csv_writer.writerows(results_list) - output = make_response(si.getvalue()) - output.headers["Content-Disposition"] = "attachment; filename=" + filename - output.headers["Content-type"] = "text/csv" - return output - else: - return return_error(code=400, source=request.url_rule.rule, title="Invalid Output Format", details="Current formats available are JSON and CSV, with CSV as default") + with database_connection() as conn, conn.cursor() as cursor: + if file_format == "json": + filename = dataset_name + "_traits.json" + cursor.execute(query.format(dataset_id)) + for result in cursor.fetchall(): + trait_dict = {} + for i, field in enumerate(field_list): + if result[i]: + trait_dict[field] = result[i] + result_list.append(trait_dict) + return flask.jsonify(result_list) + elif file_format == "csv": + filename = dataset_name + "_traits.csv" + + results_list = [] + header_list = [] + header_list += field_list + results_list.append(header_list) + cursor.execute(query.format(dataset_id)) + for result in cursor.fetchall(): + results_list.append(result) + + si = io.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results_list) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + return output + else: + return return_error( + code=400, + source=request.url_rule.rule, + title="Invalid Output Format", + details="Current formats available are JSON and CSV, with CSV as default" + ) else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + return return_error( + code=204, + source=request.url_rule.rule, + title="No Results", + details="") @app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version)) @@ -521,20 +558,21 @@ def all_sample_data(dataset_name, file_format="csv"): header_list.append("id") header_list += sample_list results_list.append(header_list) - for i, trait_id in enumerate(trait_ids): - line_list = [] - line_list.append(str(trait_names[i])) - final_query = query.format(dataset_id, trait_id) - results = g.db.execute(final_query).fetchall() - results_dict = {} - for item in results: - results_dict[item[0]] = item[2] - for sample in sample_list: - if sample in results_dict: - line_list.append(results_dict[sample]) - else: - line_list.append("x") - results_list.append(line_list) + with database_connection() as conn, conn.cursor() as cursor: + for i, trait_id in enumerate(trait_ids): + line_list = [] + line_list.append(str(trait_names[i])) + cursor.execute(query.format(dataset_id, trait_id)) + results = cursor.fetchall() + results_dict = {} + for item in results: + results_dict[item[0]] = item[2] + for sample in sample_list: + if sample in results_dict: + line_list.append(results_dict[sample]) + else: + line_list.append("x") + results_list.append(line_list) results_list = list(map(list, zip(*results_list))) @@ -554,73 +592,26 @@ def all_sample_data(dataset_name, file_format="csv"): @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) def trait_sample_data(dataset_name, trait_name, file_format="json"): - probeset_query = """ - SELECT - Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error - FROM - (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) - LEFT JOIN ProbeSetSE ON - (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) - WHERE - ProbeSet.Name = "{0}" AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetFreeze.Name = "{1}" AND - ProbeSetXRef.DataId = ProbeSetData.Id AND - ProbeSetData.StrainId = Strain.Id - ORDER BY - Strain.Name - """.format(trait_name, dataset_name) - - probeset_results = g.db.execute(probeset_query) - - sample_data = probeset_results.fetchall() - if len(sample_data) > 0: - sample_list = [] - for sample in sample_data: - sample_dict = { - "sample_name": sample[0], - "sample_name_2": sample[1], - "value": sample[2], - "data_id": sample[3], - } - if sample[4]: - sample_dict["se"] = sample[4] - sample_list.append(sample_dict) + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, Strain.Name2, " + "ProbeSetData.value, ProbeSetData.Id, " + "ProbeSetSE.error FROM (ProbeSetData, " + "ProbeSetFreeze, Strain, ProbeSet, " + "ProbeSetXRef) LEFT JOIN ProbeSetSE ON " + "(ProbeSetSE.DataId = ProbeSetData.Id AND " + "ProbeSetSE.StrainId = ProbeSetData.StrainId) " + "WHERE ProbeSet.Name = %s AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s AND " + "ProbeSetXRef.DataId = ProbeSetData.Id " + "AND ProbeSetData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (trait_name, dataset_name,) + ) - return flask.jsonify(sample_list) - else: - if not dataset_name.isdigit(): - group_id = get_group_id(dataset_name) - if group_id: - dataset_or_group = group_id - else: - dataset_or_group = dataset_name - else: - dataset_or_group = dataset_name - - pheno_query = """ - SELECT DISTINCT - Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count - FROM - (PublishData, Strain, PublishXRef, PublishFreeze) - LEFT JOIN PublishSE ON - (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) - LEFT JOIN NStrain ON - (NStrain.DataId = PublishData.Id AND - NStrain.StrainId = PublishData.StrainId) - WHERE - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = "{1}" AND - (PublishFreeze.Id = "{0}" OR PublishFreeze.Name = "{0}" OR - PublishFreeze.ShortName = "{0}" OR PublishXRef.InbredSetId = "{0}") AND - PublishData.StrainId = Strain.Id - ORDER BY - Strain.Name - """.format(dataset_or_group, trait_name) - - pheno_results = g.db.execute(pheno_query) - - sample_data = pheno_results.fetchall() + sample_data = cursor.fetchall() if len(sample_data) > 0: sample_list = [] for sample in sample_data: @@ -628,17 +619,61 @@ def trait_sample_data(dataset_name, trait_name, file_format="json"): "sample_name": sample[0], "sample_name_2": sample[1], "value": sample[2], - "data_id": sample[3] + "data_id": sample[3], } if sample[4]: sample_dict["se"] = sample[4] - if sample[5]: - sample_dict["n_cases"] = sample[5] sample_list.append(sample_dict) return flask.jsonify(sample_list) else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + if not dataset_name.isdigit(): + group_id = get_group_id(dataset_name) + if group_id: + dataset_or_group = group_id + else: + dataset_or_group = dataset_name + else: + dataset_or_group = dataset_name + + cursor.execute( + "SELECT DISTINCT Strain.Name, Strain.Name2, " + "PublishData.value, PublishData.Id, PublishSE.error, " + "NStrain.count FROM (PublishData, Strain, " + "PublishXRef, PublishFreeze) LEFT JOIN " + "PublishSE ON (PublishSE.DataId = PublishData.Id " + "AND PublishSE.StrainId = PublishData.StrainId) " + "LEFT JOIN NStrain ON " + "(NStrain.DataId = PublishData.Id AND " + "NStrain.StrainId = PublishData.StrainId) " + "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " + "AND PublishData.Id = PublishXRef.DataId AND " + "PublishXRef.Id = %s AND (PublishFreeze.Id = %s " + "OR PublishFreeze.Name = %s OR " + "PublishFreeze.ShortName = %s OR " + "PublishXRef.InbredSetId = %s) AND " + "PublishData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (trait_name, *(dataset_or_group,)*4) + ) + if len(sample_data := cursor.fetchall()) > 0: + sample_list = [] + for sample in sample_data: + sample_dict = { + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3] + } + if sample[4]: + sample_dict["se"] = sample[4] + if sample[5]: + sample_dict["n_cases"] = sample[5] + sample_list.append(sample_dict) + + return flask.jsonify(sample_list) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version)) @@ -646,70 +681,64 @@ def trait_sample_data(dataset_name, trait_name, file_format="json"): @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) def get_trait_info(dataset_name, trait_name, file_format="json"): - probeset_query = """ - SELECT - ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, - ProbeSetXRef.mean, ProbeSetXRef.se, ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.additive - FROM - ProbeSet, ProbeSetXRef, ProbeSetFreeze - WHERE - ProbeSet.Name = "{0}" AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetFreeze.Name = "{1}" - """.format(trait_name, dataset_name) - - probeset_results = g.db.execute(probeset_query) - - trait_info = probeset_results.fetchone() - if trait_info: - trait_dict = { - "id": trait_info[0], - "name": trait_info[1], - "symbol": trait_info[2], - "description": trait_info[3], - "chr": trait_info[4], - "mb": trait_info[5], - "alias": trait_info[6], - "mean": trait_info[7], - "se": trait_info[8], - "locus": trait_info[9], - "lrs": trait_info[10], - "p_value": trait_info[11], - "additive": trait_info[12] - } - - return flask.jsonify(trait_dict) - else: - # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" - if "Publish" in dataset_name: - dataset_name = dataset_name.replace("Publish", "") - - group_id = get_group_id(dataset_name) - pheno_query = """ - SELECT - PublishXRef.PhenotypeId, PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive - FROM - PublishXRef - WHERE - PublishXRef.Id = "{0}" AND - PublishXRef.InbredSetId = "{1}" - """.format(trait_name, group_id) - - pheno_results = g.db.execute(pheno_query) - - trait_info = pheno_results.fetchone() - if trait_info: + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, " + "ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, " + "ProbeSet.alias, ProbeSetXRef.mean, ProbeSetXRef.se, " + "ProbeSetXRef.Locus, ProbeSetXRef.LRS, " + "ProbeSetXRef.pValue, ProbeSetXRef.additive " + "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze " + "WHERE ProbeSet.Name = %s AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s", + (trait_name, dataset_name,) + ) + if trait_info := cursor.fetchone(): trait_dict = { "id": trait_info[0], - "locus": trait_info[1], - "lrs": trait_info[2], - "additive": trait_info[3] + "name": trait_info[1], + "symbol": trait_info[2], + "description": trait_info[3], + "chr": trait_info[4], + "mb": trait_info[5], + "alias": trait_info[6], + "mean": trait_info[7], + "se": trait_info[8], + "locus": trait_info[9], + "lrs": trait_info[10], + "p_value": trait_info[11], + "additive": trait_info[12] } return flask.jsonify(trait_dict) else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + if "Publish" in dataset_name: + dataset_name = dataset_name.replace("Publish", "") + + group_id = get_group_id(dataset_name) + cursor.execute( + "SELECT PublishXRef.PhenotypeId, " + "PublishXRef.Locus, PublishXRef.LRS, " + "PublishXRef.additive FROM " + "PublishXRef WHERE " + "PublishXRef.Id = %s AND " + "PublishXRef.InbredSetId = %s", + (trait_name, group_id,) + ) + if trait_info := cursor.fetchone(): + trait_dict = { + "id": trait_info[0], + "locus": trait_info[1], + "lrs": trait_info[2], + "additive": trait_info[3] + } + + return flask.jsonify(trait_dict) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @app.route("/api/v_{}/correlation".format(version), methods=("GET",)) @@ -872,129 +901,108 @@ def get_dataset_trait_ids(dataset_name, start_vars): limit_string = "LIMIT " + str(start_vars['limit_to']) else: limit_string = "" + with database_connection() as conn, conn.cursor() as cursor: + if "Geno" in dataset_name: + data_type = "Geno" # ZS: Need to pass back the dataset type + cursor.execute( + "SELECT GenoXRef.GenoId, Geno.Name, " + "GenoXRef.GenoFreezeId FROM Geno, " + "GenoXRef, GenoFreeze WHERE " + "Geno.Id = GenoXRef.GenoId AND " + "GenoXRef.GenoFreezeId = GenoFreeze.Id " + f"AND GenoFreeze.Name = %s {limit_string}", + (dataset_name,)) + + results = cursor.fetchall() + + trait_ids = [result[0] for result in results] + trait_names = [result[1] for result in results] + dataset_id = results[0][2] + return trait_ids, trait_names, data_type, dataset_id + + elif "Publish" in dataset_name or get_group_id(dataset_name): + data_type = "Publish" + dataset_name = dataset_name.replace("Publish", "") + dataset_id = get_group_id(dataset_name) + cursor.execute( + "SELECT PublishXRef.PhenotypeId, " + "PublishXRef.Id, InbredSet.InbredSetCode " + "FROM PublishXRef, InbredSet WHERE " + "PublishXRef.InbredSetId = %s AND " + "InbredSet.Id = PublishXRef.InbredSetId " + f"{limit_string}", + (dataset_id,) + ) + results = cursor.fetchall() + + trait_ids = [result[0] for result in results] + trait_names = [str(result[2]) + "_" + str(result[1]) + for result in results] + + return trait_ids, trait_names, data_type, dataset_id - if "Geno" in dataset_name: - data_type = "Geno" # ZS: Need to pass back the dataset type - query = """ - SELECT - GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId - FROM - Geno, GenoXRef, GenoFreeze - WHERE - Geno.Id = GenoXRef.GenoId AND - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = "{0}" - {1} - """.format(dataset_name, limit_string) - - results = g.db.execute(query).fetchall() - - trait_ids = [result[0] for result in results] - trait_names = [result[1] for result in results] - dataset_id = results[0][2] - return trait_ids, trait_names, data_type, dataset_id - - elif "Publish" in dataset_name or get_group_id(dataset_name): - data_type = "Publish" - dataset_name = dataset_name.replace("Publish", "") - dataset_id = get_group_id(dataset_name) - - query = """ - SELECT - PublishXRef.PhenotypeId, PublishXRef.Id, InbredSet.InbredSetCode - FROM - PublishXRef, InbredSet - WHERE - PublishXRef.InbredSetId = "{0}" AND - InbredSet.Id = PublishXRef.InbredSetId - {1} - """.format(dataset_id, limit_string) - - results = g.db.execute(query).fetchall() - - trait_ids = [result[0] for result in results] - trait_names = [str(result[2]) + "_" + str(result[1]) - for result in results] - - return trait_ids, trait_names, data_type, dataset_id - - else: - data_type = "ProbeSet" - query = """ - SELECT - ProbeSetXRef.ProbeSetId, ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId - FROM - ProbeSet, ProbeSetXRef, ProbeSetFreeze - WHERE - ProbeSet.Id = ProbeSetXRef.ProbeSetId AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetFreeze.Name = "{0}" - {1} - """.format(dataset_name, limit_string) - - results = g.db.execute(query).fetchall() - - trait_ids = [result[0] for result in results] - trait_names = [result[1] for result in results] - dataset_id = results[0][2] - return trait_ids, trait_names, data_type, dataset_id + else: + data_type = "ProbeSet" + cursor.execute( + "SELECT ProbeSetXRef.ProbeSetId, " + "ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId " + "FROM ProbeSet, ProbeSetXRef, " + "ProbeSetFreeze WHERE " + "ProbeSet.Id = ProbeSetXRef.ProbeSetId AND " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + f"AND ProbeSetFreeze.Name = %s {limit_string}", + (dataset_name,) + ) + results = cursor.fetchall() + trait_ids = [result[0] for result in results] + trait_names = [result[1] for result in results] + dataset_id = results[0][2] + return trait_ids, trait_names, data_type, dataset_id def get_samplelist(dataset_name): - group_id = get_group_id_from_dataset(dataset_name) - - query = """ - SELECT Strain.Name - FROM Strain, StrainXRef - WHERE StrainXRef.StrainId = Strain.Id AND - StrainXRef.InbredSetId = {} - """.format(group_id) - - results = g.db.execute(query).fetchall() - - samplelist = [result[0] for result in results] - - return samplelist + with database_connection() as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name FROM Strain, StrainXRef " + "WHERE StrainXRef.StrainId = Strain.Id AND " + "StrainXRef.InbredSetId = %s", + (dataset_name,) + ) + # sample list + return [result[0] for result in cursor.fetchall()] def get_group_id_from_dataset(dataset_name): - if "Publish" in dataset_name: - query = """ - SELECT - InbredSet.Id - FROM - InbredSet, PublishFreeze - WHERE - PublishFreeze.InbredSetId = InbredSet.Id AND - PublishFreeze.Name = "{}" - """.format(dataset_name) - elif "Geno" in dataset_name: - query = """ - SELECT - InbredSet.Id - FROM - InbredSet, GenoFreeze - WHERE - GenoFreeze.InbredSetId = InbredSet.Id AND - GenoFreeze.Name = "{}" - """.format(dataset_name) - else: - query = """ - SELECT - InbredSet.Id - FROM - InbredSet, ProbeSetFreeze, ProbeFreeze - WHERE - ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND - ProbeSetFreeze.Name = "{}" - """.format(dataset_name) - - result = g.db.execute(query).fetchone() - - if len(result) > 0: - return result[0] - else: + result = () + with database_connection() as conn, conn.cursor() as cursor: + if "Publish" in dataset_name: + cursor.execute( + "SELECT InbredSet.Id FROM " + "InbredSet, PublishFreeze " + "WHERE PublishFreeze.InbredSetId = InbredSet.Id " + "AND PublishFreeze.Name = %s", + (dataset_name,) + ) + elif "Geno" in dataset_name: + cursor.execute( + "SELECT InbredSet.Id FROM " + "InbredSet, GenoFreeze WHERE " + "GenoFreeze.InbredSetId = InbredSet.Id " + "AND GenoFreeze.Name = %s", + (dataset_name,) + ) + else: + cursor.execute( + "SELECT InbredSet.Id FROM " + "InbredSet, ProbeSetFreeze, " + "ProbeFreeze WHERE " + "ProbeFreeze.InbredSetId = InbredSet.Id " + "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " + "AND ProbeSetFreeze.Name = %s", + (dataset_name,) + ) + if result := cursor.fetchone(): + return result[0] return None |