aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorMunyoki Kilyungi2022-09-05 10:21:06 +0300
committerBonfaceKilz2022-09-08 14:26:19 +0300
commit8aef689bb1ab735ad4605af8b8b282672f92d2cf (patch)
tree92506b8120f4223bfe92784ce7bd83d70ecae20b /wqflask
parentc341c4565126c47c78f220871d9e334dbe5aec73 (diff)
downloadgenenetwork2-8aef689bb1ab735ad4605af8b8b282672f92d2cf.tar.gz
Replace g.db with database_connection() context manager
* wqflask/base/data_set.py (DatasetType.set_dataset_key): Replace g.db with "database_connection()". (DatasetGroup.get_mapping_methods): Ditto. (DataSet.get_accession_id): Ditto. (DataSet.retrieve_other_names): Rework "if ... else" logic to get rid of "query" and "_vars" variables. Also, nest "try ... except" block within "database_connection". (DataSet.chunk_dataset): Replace g.db with "database_connection()". (DataSet.get_probeset_data): Ditto. (DataSet.get_trait_data): Ditto. (PhenotypeDataSet.get_trait_info): Ditto. (PhenotypeDataSet.retrieve_sample_data): Ditto. (GenotypeDataSet.retrieve_sample_data): Ditto. (MrnaAssayDataSet.get_trait_info): Ditto. (MrnaAssayDataSet.retrieve_sample_data): Ditto. (MrnaAssayDataSet.retrieve_genes): Ditto. (TempDataSet): Ditto. (geno_mrna_confidentiality): Ditto. (query_table_timestamp): Ditto. * wqflask/wqflask/api/router.py: Replace "flask.g" with database_connection import. (get_species_list): Replace g.db with "database_connection()". (get_species_info): Ditto. (get_groups_list): Ditto. (get_group_info): Ditto. (get_datasets_for_group): Ditto. (get_dataset_info): Ditto. (fetch_traits): Ditto. (all_sample_data): Ditto. (trait_sample_data): Ditto. (get_trait_info): Ditto. (get_dataset_trait_ids): Ditto. (get_samplelist): Ditto. (get_group_id_from_dataset): Ditto. (get_group_id): Ditto.
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/base/data_set.py711
-rw-r--r--wqflask/wqflask/api/router.py902
2 files changed, 810 insertions, 803 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 2f4c1154..ade38131 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -141,18 +141,18 @@ class DatasetType:
sql_query_mapping = {
'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM "
"ProbeSetFreeze WHERE "
- "ProbeSetFreeze.Name = \"%s\" "),
+ "ProbeSetFreeze.Name = %s "),
'pheno': ("SELECT InfoFiles.GN_AccesionId "
"FROM InfoFiles, PublishFreeze, InbredSet "
- "WHERE InbredSet.Name = '%s' AND "
+ "WHERE InbredSet.Name = %s AND "
"PublishFreeze.InbredSetId = InbredSet.Id AND "
"InfoFiles.InfoPageName = PublishFreeze.Name"),
'other_pheno': ("SELECT PublishFreeze.Name "
"FROM PublishFreeze, InbredSet "
- "WHERE InbredSet.Name = '%s' AND "
+ "WHERE InbredSet.Name = %s AND "
"PublishFreeze.InbredSetId = InbredSet.Id"),
'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE "
- "GenoFreeze.Name = \"%s\" ")
+ "GenoFreeze.Name = %s ")
}
dataset_name_mapping = {
@@ -166,13 +166,14 @@ class DatasetType:
if t in ['pheno', 'other_pheno']:
group_name = name.replace("Publish", "")
- results = g.db.execute(sql_query_mapping[t] % group_name).fetchone()
- if results:
- self.datasets[name] = dataset_name_mapping[t]
- self.redis_instance.set(
- "dataset_structure", json.dumps(self.datasets))
- return True
- return None
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(sql_query_mapping[t], (group_name,))
+ if cursor.fetchone():
+ self.datasets[name] = dataset_name_mapping[t]
+ self.redis_instance.set(
+ "dataset_structure", json.dumps(self.datasets))
+ return True
+
def __call__(self, name):
if name not in self.datasets:
@@ -348,9 +349,13 @@ class DatasetGroup:
self.genofile = None
def get_mapping_methods(self):
-
- mapping_id = g.db.execute(
- "select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0]
+ mapping_id = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT MappingMethodId FROM "
+ "InbredSet WHERE Name= %s"
+ (self.name,))
+ mapping_id = cursor.fetchone()[0]
if mapping_id == "1":
mapping_names = ["GEMMA", "QTLReaper", "R/qtl"]
elif mapping_id == "2":
@@ -589,29 +594,38 @@ class DataSet:
}
def get_accession_id(self):
- if self.type == "Publish":
- results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
- InbredSet.Name = %s and
- PublishFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = PublishFreeze.Name and
- PublishFreeze.public > 0 and
- PublishFreeze.confidentiality < 1 order by
- PublishFreeze.CreateTime desc""", (self.group.name)).fetchone()
- elif self.type == "Geno":
- results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
- InbredSet.Name = %s and
- GenoFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = GenoFreeze.ShortName and
- GenoFreeze.public > 0 and
- GenoFreeze.confidentiality < 1 order by
- GenoFreeze.CreateTime desc""", (self.group.name)).fetchone()
- else:
- results = None
+ results = None
+ with database_connection() as conn, conn.cursor() as cursor:
+ if self.type == "Publish":
+ cursor.execute(
+ "SELECT InfoFiles.GN_AccesionId FROM "
+ "InfoFiles, PublishFreeze, InbredSet "
+ "WHERE InbredSet.Name = %s AND "
+ "PublishFreeze.InbredSetId = InbredSet.Id "
+ "AND InfoFiles.InfoPageName = PublishFreeze.Name "
+ "AND PublishFreeze.public > 0 AND "
+ "PublishFreeze.confidentiality < 1 "
+ "ORDER BY PublishFreeze.CreateTime DESC",
+ (self.group.name,)
+ )
+ results = cursor.fetchone()
+ elif self.type == "Geno":
+ cursor.execute(
+ "SELECT InfoFiles.GN_AccesionId FROM "
+ "InfoFiles, GenoFreeze, InbredSet "
+ "WHERE InbredSet.Name = %s AND "
+ "GenoFreeze.InbredSetId = InbredSet.Id "
+ "AND InfoFiles.InfoPageName = GenoFreeze.ShortName "
+ "AND GenoFreeze.public > 0 AND "
+ "GenoFreeze.confidentiality < 1 "
+ "ORDER BY GenoFreeze.CreateTime DESC",
+ (self.group.name,)
+ )
+ results = cursor.fetchone()
- if results != None:
+ if results:
return str(results[0])
- else:
- return "None"
+ return "None"
def retrieve_other_names(self):
"""This method fetches the the dataset names in search_result.
@@ -624,52 +638,49 @@ class DataSet:
all is passed.
"""
-
- try:
- query = ""
- _vars = None
- query_args = (self.name, self.name, self.name)
- if self.type == "ProbeSet":
- query = (
- "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, "
- "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, "
- "ProbeSetFreeze.DataScale, Tissue.Name "
- "FROM ProbeSetFreeze, ProbeFreeze, Tissue "
- "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
- "AND ProbeFreeze.TissueId = Tissue.Id "
- "AND (ProbeSetFreeze.Name = %s OR ProbeSetFreeze.FullName = %s OR ProbeSetFreeze.ShortName = %s)")
- else:
- query = (
- "SELECT Id, Name, FullName, ShortName "
- f"FROM {self.type}Freeze "
- "WHERE (Name = %s OR FullName = %s OR ShortName = %s)")
- self.tissue = "N/A"
- with database_connection() as conn, conn.cursor() as cursor:
- cursor.execute(query, query_args)
- _vars = cursor.fetchone()
+ with database_connection() as conn, conn.cursor() as cursor:
+ try:
if self.type == "ProbeSet":
+ cursor.execute(
+ "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, "
+ "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, "
+ "ProbeSetFreeze.DataScale, Tissue.Name "
+ "FROM ProbeSetFreeze, ProbeFreeze, Tissue "
+ "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
+ "AND ProbeFreeze.TissueId = Tissue.Id "
+ "AND (ProbeSetFreeze.Name = %s OR "
+ "ProbeSetFreeze.FullName = %s "
+ "OR ProbeSetFreeze.ShortName = %s)",
+ (self.name,)*3)
(self.id, self.name, self.fullname, self.shortname,
- self.data_scale, self.tissue) = _vars
+ self.data_scale, self.tissue) = cursor.fetchone()
else:
- self.id, self.name, self.fullname, self.shortname = _vars
- except TypeError:
- pass
+ cursor.execute(
+ "SELECT Id, Name, FullName, ShortName "
+ f"FROM {self.type}Freeze "
+ "WHERE (Name = %s OR FullName = "
+ "%s OR ShortName = %s)",
+ (self.name,)*3)
+ (self.id, self.name, self.fullname, self.shortname,
+ self.data_scale, self.tissue) = cursor.fetchone()
+ self.tissue = "N/A"
+ except TypeError:
+ pass
def chunk_dataset(self, dataset, n):
results = {}
-
- query = """
- SELECT ProbeSetXRef.DataId,ProbeSet.Name
- FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze
- WHERE ProbeSetFreeze.Name = '{}' AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetXRef.ProbeSetId = ProbeSet.Id
- """.format(self.name)
-
- # should cache this
-
- traits_name_dict = dict(g.db.execute(query).fetchall())
+ traits_name_dict = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT ProbeSetXRef.DataId,ProbeSet.Name "
+ "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze "
+ "WHERE ProbeSetFreeze.Name = %s AND "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id",
+ (self.name,))
+ # should cache this
+ traits_name_dict = dict(cursor.fetchall())
for i in range(0, len(dataset), n):
matrix = list(dataset[i:i + n])
@@ -691,31 +702,35 @@ class DataSet:
if self.group.parlist != None and self.group.f1list != None:
if (self.group.parlist + self.group.f1list) in self.samplelist:
self.samplelist += self.group.parlist + self.group.f1list
-
- query = """
- SELECT Strain.Name, Strain.Id FROM Strain, Species
- WHERE Strain.Name IN {}
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- results = dict(g.db.execute(query).fetchall())
- sample_ids = [results[item] for item in self.samplelist]
-
- sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
- results.items(), key=lambda item: item[1])]
-
- query = """SELECT * from ProbeSetData
- where StrainID in {}
- and id in (SELECT ProbeSetXRef.DataId
- FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
- WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Name = '{}'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name)
-
- query_results = list(g.db.execute(query).fetchall())
- data_results = self.chunk_dataset(query_results, len(sample_ids))
- self.samplelist = sorted_samplelist
- self.trait_data = data_results
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, Strain.Id FROM "
+ "Strain, Species WHERE Strain.Name IN "
+ f"{create_in_clause(self.samplelist)} "
+ "AND Strain.SpeciesId=Species.Id AND "
+ "Species.name = %s", (self.group.species,)
+ )
+ results = dict(cursor.fetchall())
+ sample_ids = [results[item] for item in self.samplelist]
+
+ sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
+ results.items(), key=lambda item: item[1])]
+
+ cursor.execute(
+ "SELECT * from ProbeSetData WHERE StrainID IN "
+ f"{create_in_clause(sample_ids)} AND id IN "
+ "(SELECT ProbeSetXRef.DataId FROM "
+ "(ProbeSet, ProbeSetXRef, ProbeSetFreeze) "
+ "WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetFreeze.Name = %s AND "
+ "ProbeSet.Id = ProbeSetXRef.ProbeSetId)",
+ (self.name,)
+ )
+
+ query_results = list(cursor.fetchall())
+ data_results = self.chunk_dataset(query_results, len(sample_ids))
+ self.samplelist = sorted_samplelist
+ self.trait_data = data_results
def get_trait_data(self, sample_list=None):
if sample_list:
@@ -727,81 +742,84 @@ class DataSet:
if (self.group.parlist + self.group.f1list) in self.samplelist:
self.samplelist += self.group.parlist + self.group.f1list
- query = """
- SELECT Strain.Name, Strain.Id FROM Strain, Species
- WHERE Strain.Name IN {}
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- results = dict(g.db.execute(query).fetchall())
- sample_ids = [results.get(item)
- for item in self.samplelist if item is not None]
-
- # MySQL limits the number of tables that can be used in a join to 61,
- # so we break the sample ids into smaller chunks
- # Postgres doesn't have that limit, so we can get rid of this after we transition
- chunk_size = 50
- number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
-
- cached_results = fetch_cached_results(self.name, self.type, self.samplelist)
- if cached_results is None:
- trait_sample_data = []
- for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
- if self.type == "Publish":
- dataset_type = "Phenotype"
- else:
- dataset_type = self.type
- temp = ['T%s.value' % item for item in sample_ids_step]
- if self.type == "Publish":
- query = "SELECT {}XRef.Id,".format(escape(self.type))
- else:
- query = "SELECT {}.Name,".format(escape(dataset_type))
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, Strain.Id FROM Strain, Species "
+ f"WHERE Strain.Name IN {create_in_clause(self.samplelist)} "
+ "AND Strain.SpeciesId=Species.Id "
+ "AND Species.name = %s",
+ (self.group.species,)
+ )
+ results = dict(cursor.fetchall())
+ sample_ids = [results.get(item)
+ for item in self.samplelist if item is not None]
+
+ # MySQL limits the number of tables that can be used in a join to 61,
+ # so we break the sample ids into smaller chunks
+ # Postgres doesn't have that limit, so we can get rid of this after we transition
+ chunk_size = 50
+ number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
+
+ cached_results = fetch_cached_results(self.name, self.type, self.samplelist)
+
+ if cached_results is None:
+ trait_sample_data = []
+ for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
+ if self.type == "Publish":
+ dataset_type = "Phenotype"
+ else:
+ dataset_type = self.type
+ temp = ['T%s.value' % item for item in sample_ids_step]
+ if self.type == "Publish":
+ query = "SELECT {}XRef.Id,".format(escape(self.type))
+ else:
+ query = "SELECT {}.Name,".format(escape(dataset_type))
+ data_start_pos = 1
+ query += ', '.join(temp)
+ query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
+ self.type,
+ self.type))
+
+ for item in sample_ids_step:
+ query += """
+ left join {}Data as T{} on T{}.Id = {}XRef.DataId
+ and T{}.StrainId={}\n
+ """.format(*mescape(self.type, item, item, self.type, item, item))
+
+ if self.type == "Publish":
+ query += """
+ WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
+ and {}Freeze.Name = '{}'
+ and {}.Id = {}XRef.{}Id
+ order by {}.Id
+ """.format(*mescape(self.type, self.type, self.type, self.name,
+ dataset_type, self.type, dataset_type, dataset_type))
+ else:
+ query += """
+ WHERE {}XRef.{}FreezeId = {}Freeze.Id
+ and {}Freeze.Name = '{}'
+ and {}.Id = {}XRef.{}Id
+ order by {}.Id
+ """.format(*mescape(self.type, self.type, self.type, self.type,
+ self.name, dataset_type, self.type, self.type, dataset_type))
+ cursor.execute(query)
+ results = cursor.fetchall()
+ trait_sample_data.append([list(result) for result in results])
+
+ trait_count = len(trait_sample_data[0])
+ self.trait_data = collections.defaultdict(list)
+
data_start_pos = 1
- query += ', '.join(temp)
- query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
- self.type,
- self.type))
-
- for item in sample_ids_step:
- query += """
- left join {}Data as T{} on T{}.Id = {}XRef.DataId
- and T{}.StrainId={}\n
- """.format(*mescape(self.type, item, item, self.type, item, item))
-
- if self.type == "Publish":
- query += """
- WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
- and {}Freeze.Name = '{}'
- and {}.Id = {}XRef.{}Id
- order by {}.Id
- """.format(*mescape(self.type, self.type, self.type, self.name,
- dataset_type, self.type, dataset_type, dataset_type))
- else:
- query += """
- WHERE {}XRef.{}FreezeId = {}Freeze.Id
- and {}Freeze.Name = '{}'
- and {}.Id = {}XRef.{}Id
- order by {}.Id
- """.format(*mescape(self.type, self.type, self.type, self.type,
- self.name, dataset_type, self.type, self.type, dataset_type))
-
- results = g.db.execute(query).fetchall()
- trait_sample_data.append([list(result) for result in results])
-
- trait_count = len(trait_sample_data[0])
- self.trait_data = collections.defaultdict(list)
-
- data_start_pos = 1
- for trait_counter in range(trait_count):
- trait_name = trait_sample_data[0][trait_counter][0]
- for chunk_counter in range(int(number_chunks)):
- self.trait_data[trait_name] += (
- trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
-
- cache_dataset_results(
- self.name, self.type, self.samplelist, self.trait_data)
- else:
- self.trait_data = cached_results
+ for trait_counter in range(trait_count):
+ trait_name = trait_sample_data[0][trait_counter][0]
+ for chunk_counter in range(int(number_chunks)):
+ self.trait_data[trait_name] += (
+ trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
+
+ cache_dataset_results(
+ self.name, self.type, self.samplelist, self.trait_data)
+ else:
+ self.trait_data = cached_results
class PhenotypeDataSet(DataSet):
@@ -897,43 +915,41 @@ SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType, InbredSet.InbredSetC
this_trait.LRS_location_repr = "N/A"
if this_trait.lrs:
- query = """
- select Geno.Chr, Geno.Mb from Geno, Species
- where Species.Name = '%s' and
- Geno.Name = '%s' and
- Geno.SpeciesId = Species.Id
- """ % (species, this_trait.locus)
- result = g.db.execute(query).fetchone()
-
- if result:
- if result[0] and result[1]:
- LRS_Chr = result[0]
- LRS_Mb = result[1]
-
- this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
- this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
- LRS_Chr, float(LRS_Mb))
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Geno.Chr, Geno.Mb FROM "
+ "Geno, Species WHERE "
+ "Species.Name = %s AND "
+ "Geno.Name = %s AND "
+ "Geno.SpeciesId = Species.Id",
+ (species, this_trait.locus,)
+ )
+ if result := cursor.fetchone():
+ if result[0] and result[1]:
+ LRS_Chr, LRS_Mb = result[0], result[1]
+ this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
+ this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
+ LRS_Chr, float(LRS_Mb))
def retrieve_sample_data(self, trait):
- query = """
- SELECT
- Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2
- FROM
- (PublishData, Strain, PublishXRef, PublishFreeze)
- left join PublishSE on
- (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
- left join NStrain on
- (NStrain.DataId = PublishData.Id AND
- NStrain.StrainId = PublishData.StrainId)
- WHERE
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
- PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
- results = g.db.execute(query, (trait, self.id)).fetchall()
- return results
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, PublishData.value, "
+ "PublishSE.error, NStrain.count, "
+ "Strain.Name2 FROM (PublishData, Strain, "
+ "PublishXRef, PublishFreeze) LEFT JOIN "
+ "PublishSE ON "
+ "(PublishSE.DataId = PublishData.Id "
+ "AND PublishSE.StrainId = PublishData.StrainId) "
+ "LEFT JOIN NStrain ON "
+ "(NStrain.DataId = PublishData.Id AND "
+ "NStrain.StrainId = PublishData.StrainId) "
+ "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
+ "AND PublishData.Id = PublishXRef.DataId AND "
+ "PublishXRef.Id = %s AND PublishFreeze.Id = %s "
+ "AND PublishData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name", (trait, self.id))
+ return cursor.fetchall()
class GenotypeDataSet(DataSet):
@@ -976,25 +992,25 @@ GenoFreeze.Name = %s"""
this_trait.chr, float(this_trait.mb))
def retrieve_sample_data(self, trait):
- query = """
- SELECT
- Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
- FROM
- (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
- left join GenoSE on
- (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
- WHERE
- Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
- GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = %s AND
- GenoXRef.DataId = GenoData.Id AND
- GenoData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
- results = g.db.execute(query,
- (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
- trait, self.name)).fetchall()
+ results = []
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, GenoData.value, "
+ "GenoSE.error, 'N/A', Strain.Name2 "
+ "FROM (GenoData, GenoFreeze, Strain, Geno, "
+ "GenoXRef) LEFT JOIN GenoSE ON "
+ "(GenoSE.DataId = GenoData.Id AND "
+ "GenoSE.StrainId = GenoData.StrainId) "
+ "WHERE Geno.SpeciesId = %s AND "
+ "Geno.Name = %s AND GenoXRef.GenoId = Geno.Id "
+ "AND GenoXRef.GenoFreezeId = GenoFreeze.Id "
+ "AND GenoFreeze.Name = %s AND "
+ "GenoXRef.DataId = GenoData.Id "
+ "AND GenoData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name",
+ (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
+ trait, self.name,))
+ results = list(cursor.fetchall())
if self.group.name in webqtlUtil.ParInfo:
f1_1, f1_2, ref, nonref = webqtlUtil.ParInfo[self.group.name]
@@ -1073,114 +1089,113 @@ ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND ProbeSetFreeze.Name = %s"""
# Note: setting trait_list to [] is probably not a great idea.
if not trait_list:
trait_list = []
+ with database_connection() as conn, conn.cursor() as cursor:
+ for this_trait in trait_list:
- for this_trait in trait_list:
-
- if not this_trait.haveinfo:
- this_trait.retrieveInfo(QTL=1)
-
- if not this_trait.symbol:
- this_trait.symbol = "N/A"
-
- # XZ, 12/08/2008: description
- # XZ, 06/05/2009: Rob asked to add probe target description
- description_string = str(
- str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
- target_string = str(
- str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
-
- if len(description_string) > 1 and description_string != 'None':
- description_display = description_string
- else:
- description_display = this_trait.symbol
-
- if (len(description_display) > 1 and description_display != 'N/A'
- and len(target_string) > 1 and target_string != 'None'):
- description_display = description_display + '; ' + target_string.strip()
-
- # Save it for the jinja2 template
- this_trait.description_display = description_display
-
- if this_trait.chr and this_trait.mb:
- this_trait.location_repr = 'Chr%s: %.6f' % (
- this_trait.chr, float(this_trait.mb))
-
- # Get mean expression value
- query = (
- """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
- where ProbeSetXRef.ProbeSetFreezeId = %s and
- ProbeSet.Id = ProbeSetXRef.ProbeSetId and
- ProbeSet.Name = '%s'
- """ % (escape(str(this_trait.dataset.id)),
- escape(this_trait.name)))
- result = g.db.execute(query).fetchone()
+ if not this_trait.haveinfo:
+ this_trait.retrieveInfo(QTL=1)
- mean = result[0] if result else 0
+ if not this_trait.symbol:
+ this_trait.symbol = "N/A"
- if mean:
- this_trait.mean = "%2.3f" % mean
+ # XZ, 12/08/2008: description
+ # XZ, 06/05/2009: Rob asked to add probe target description
+ description_string = str(
+ str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
+ target_string = str(
+ str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
- # LRS and its location
- this_trait.LRS_score_repr = 'N/A'
- this_trait.LRS_location_repr = 'N/A'
-
- # Max LRS and its Locus location
- if this_trait.lrs and this_trait.locus:
- query = """
- select Geno.Chr, Geno.Mb from Geno, Species
- where Species.Name = '{}' and
- Geno.Name = '{}' and
- Geno.SpeciesId = Species.Id
- """.format(species, this_trait.locus)
- result = g.db.execute(query).fetchone()
-
- if result:
- lrs_chr, lrs_mb = result
- this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
- this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
- lrs_chr, float(lrs_mb))
+ if len(description_string) > 1 and description_string != 'None':
+ description_display = description_string
+ else:
+ description_display = this_trait.symbol
+
+ if (len(description_display) > 1 and description_display != 'N/A'
+ and len(target_string) > 1 and target_string != 'None'):
+ description_display = description_display + '; ' + target_string.strip()
+
+ # Save it for the jinja2 template
+ this_trait.description_display = description_display
+
+ if this_trait.chr and this_trait.mb:
+ this_trait.location_repr = 'Chr%s: %.6f' % (
+ this_trait.chr, float(this_trait.mb))
+
+ # Get mean expression value
+ cursor.execute(
+ "SELECT ProbeSetXRef.mean FROM "
+ "ProbeSetXRef, ProbeSet WHERE "
+ "ProbeSetXRef.ProbeSetFreezeId = %s "
+ "AND ProbeSet.Id = ProbeSetXRef.ProbeSetId "
+ "AND ProbeSet.Name = %s",
+ (str(this_trait.dataset.id), this_trait.name,)
+ )
+ result = cursor.fetchone()
+
+ mean = result[0] if result else 0
+
+ if mean:
+ this_trait.mean = "%2.3f" % mean
+
+ # LRS and its location
+ this_trait.LRS_score_repr = 'N/A'
+ this_trait.LRS_location_repr = 'N/A'
+
+ # Max LRS and its Locus location
+ if this_trait.lrs and this_trait.locus:
+ cursor.execute(
+ "SELECT Geno.Chr, Geno.Mb FROM "
+ "Geno, Species WHERE "
+ "Species.Name = %s AND "
+ "Geno.Name = %s AND "
+ "Geno.SpeciesId = Species.Id",
+ (species, this_trait.locus,)
+ )
+ if result := cursor.fetchone():
+ lrs_chr, lrs_mb = result
+ this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
+ this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
+ lrs_chr, float(lrs_mb))
return trait_list
def retrieve_sample_data(self, trait):
- query = """
- SELECT
- Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
- FROM
- (ProbeSetData, ProbeSetFreeze,
- Strain, ProbeSet, ProbeSetXRef)
- left join ProbeSetSE on
- (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
- left join NStrain on
- (NStrain.DataId = ProbeSetData.Id AND
- NStrain.StrainId = ProbeSetData.StrainId)
- WHERE
- ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = '%s' AND
- ProbeSetXRef.DataId = ProbeSetData.Id AND
- ProbeSetData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """ % (escape(trait), escape(self.name))
- results = g.db.execute(query).fetchall()
- return results
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, ProbeSetData.value, "
+ "ProbeSetSE.error, NStrain.count, "
+ "Strain.Name2 FROM (ProbeSetData, "
+ "ProbeSetFreeze, Strain, ProbeSet, "
+ "ProbeSetXRef) LEFT JOIN ProbeSetSE ON "
+ "(ProbeSetSE.DataId = ProbeSetData.Id AND "
+ "ProbeSetSE.StrainId = ProbeSetData.StrainId) "
+ "LEFT JOIN NStrain ON "
+ "(NStrain.DataId = ProbeSetData.Id AND "
+ "NStrain.StrainId = ProbeSetData.StrainId) "
+ "WHERE ProbeSet.Name = %s AND "
+ "ProbeSetXRef.ProbeSetId = ProbeSet.Id "
+ "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetFreeze.Name = %s AND "
+ "ProbeSetXRef.DataId = ProbeSetData.Id "
+ "AND ProbeSetData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name",
+ (trait, self.name)
+ )
+ return cursor.fetchall()
def retrieve_genes(self, column_name):
- query = """
- select ProbeSet.Name, ProbeSet.%s
- from ProbeSet,ProbeSetXRef
- where ProbeSetXRef.ProbeSetFreezeId = %s and
- ProbeSetXRef.ProbeSetId=ProbeSet.Id;
- """ % (column_name, escape(str(self.id)))
- results = g.db.execute(query).fetchall()
-
- return dict(results)
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ f"SELECT ProbeSet.Name, ProbeSet.{column_name} "
+ "FROM ProbeSet,ProbeSetXRef WHERE "
+ "ProbeSetXRef.ProbeSetFreezeId = %s "
+ "AND ProbeSetXRef.ProbeSetId=ProbeSet.Id",
+ (str(self.id),))
+ return dict(cursor.fetchall())
class TempDataSet(DataSet):
- '''Temporary user-generated data set'''
-
+ """Temporary user-generated data set"""
DS_NAME_MAP['Temp'] = 'TempDataSet'
def setup(self):
@@ -1202,30 +1217,15 @@ class TempDataSet(DataSet):
def geno_mrna_confidentiality(ob):
- dataset_table = ob.type + "Freeze"
-
- query = '''SELECT Id, Name, FullName, confidentiality,
- AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
- result = g.db.execute(query)
-
- (dataset_id,
- name,
- full_name,
- confidential,
- authorized_users) = result.fetchall()[0]
-
- if confidential:
- return True
- result = g.db.execute(query)
-
- (dataset_id,
- name,
- full_name,
- confidential,
- authorized_users) = result.fetchall()[0]
-
- if confidential:
- return True
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT confidentiality, "
+ "AuthorisedUsers FROM %s WHERE Name = %s",
+ (f"{ob.type}Freeze", ob.name,)
+ )
+ result = cursor.fetchall()[0]
+ if result:
+ return True
def parse_db_url():
@@ -1239,16 +1239,15 @@ def query_table_timestamp(dataset_type: str):
"""function to query the update timestamp of a given dataset_type"""
# computation data and actions
-
- fetch_db_name = parse_db_url()
- query_update_time = f"""
- SELECT UPDATE_TIME FROM information_schema.tables
- WHERE TABLE_SCHEMA = '{fetch_db_name[-1]}'
- AND TABLE_NAME = '{dataset_type}Data'
- """
-
- date_time_obj = g.db.execute(query_update_time).fetchone()[0]
- return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
+ with database_connection() as conn, conn.cursor() as cursor:
+ fetch_db_name = parse_db_url()
+ cursor.execute(
+ "SELECT UPDATE_TIME FROM "
+ "information_schema.tables "
+ f"WHERE TABLE_SCHEMA = '{fetch_db_name[-1]}' "
+ f"AND TABLE_NAME = '{dataset_type}Data'")
+ date_time_obj = cursor.fetchone()[0]
+ return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str, samplelist: str):
diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py
index ca700083..7335ec4f 100644
--- a/wqflask/wqflask/api/router.py
+++ b/wqflask/wqflask/api/router.py
@@ -12,7 +12,7 @@ from zipfile import ZipFile, ZIP_DEFLATED
import flask
from flask import current_app
-from flask import g
+from wqflask.database import database_connection
from flask import request
from flask import make_response
from flask import send_file
@@ -36,62 +36,72 @@ def hello_world():
@app.route("/api/v_{}/species".format(version))
def get_species_list():
- results = g.db.execute(
- "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;")
- the_species = results.fetchall()
species_list = []
- for species in the_species:
- species_dict = {
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species"
+ )
+ for species in cursor.fetchall():
+ species_dict = {
"Id": species[0],
"Name": species[1],
"FullName": species[2],
"TaxonomyId": species[3]
}
species_list.append(species_dict)
-
return flask.jsonify(species_list)
@app.route("/api/v_{}/species/<path:species_name>".format(version))
@app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version))
def get_species_info(species_name, file_format="json"):
- results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId
- FROM Species
- WHERE (Name="{0}" OR FullName="{0}" OR SpeciesName="{0}");""".format(species_name))
-
- the_species = results.fetchone()
- species_dict = {
- "Id": the_species[0],
- "Name": the_species[1],
- "FullName": the_species[2],
- "TaxonomyId": the_species[3]
- }
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT SpeciesId, Name, FullName, TaxonomyId "
+ "FROM Species WHERE (Name=%s OR FullName=%s "
+ "OR SpeciesName=%s)", ((species_name,)*3))
+ _species = cursor.fetchone()
+ species_dict = {
+ "Id": _species[0],
+ "Name": _species[1],
+ "FullName": _species[2],
+ "TaxonomyId": _species[3]
+ }
- return flask.jsonify(species_dict)
+ return flask.jsonify(species_dict)
@app.route("/api/v_{}/groups".format(version))
@app.route("/api/v_{}/groups/<path:species_name>".format(version))
def get_groups_list(species_name=None):
- if species_name:
- results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
- InbredSet.Name, InbredSet.FullName, InbredSet.public,
- IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None")
- FROM InbredSet, Species
- WHERE InbredSet.SpeciesId = Species.Id AND
- (Species.Name = "{0}" OR
- Species.FullName="{0}" OR
- Species.SpeciesName="{0}");""".format(species_name))
- else:
- results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
- InbredSet.Name, InbredSet.FullName, InbredSet.public,
- IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None")
- FROM InbredSet;""")
-
- the_groups = results.fetchall()
- if the_groups:
+ _groups = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ if species_name:
+ cursor.execute(
+ "SELECT InbredSet.InbredSetId, "
+ "InbredSet.SpeciesId, InbredSet.InbredSetName, "
+ "InbredSet.Name, InbredSet.FullName, "
+ "InbredSet.public, IFNULL(InbredSet.MappingMethodId, "
+ "'None'), IFNULL(InbredSet.GeneticType, 'None') "
+ "FROM InbredSet, Species WHERE "
+ "InbredSet.SpeciesId = Species.Id AND "
+ "(Species.Name = %s OR Species.FullName=%s "
+ "OR Species.SpeciesName=%s)", ((species_name,) * 3)
+ )
+ else:
+ cursor.execute(
+ "SELECT InbredSet.InbredSetId, "
+ "InbredSet.SpeciesId, InbredSet.InbredSetName, "
+ "InbredSet.Name, InbredSet.FullName, "
+ "InbredSet.public, IFNULL(InbredSet.MappingMethodId, "
+ "'None'), IFNULL(InbredSet.GeneticType, 'None') "
+ "FROM InbredSet"
+ )
+ _groups = cursor.fetchall()
+
+ if _groups:
groups_list = []
- for group in the_groups:
+ for group in _groups:
group_dict = {
"Id": group[0],
"SpeciesId": group[1],
@@ -103,10 +113,8 @@ def get_groups_list(species_name=None):
"GeneticType": group[7]
}
groups_list.append(group_dict)
-
return flask.jsonify(groups_list)
- else:
- return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/group/<path:group_name>".format(version))
@@ -114,26 +122,37 @@ def get_groups_list(species_name=None):
@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version))
@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version))
def get_group_info(group_name, species_name=None, file_format="json"):
- if species_name:
- results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
- InbredSet.Name, InbredSet.FullName, InbredSet.public,
- IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None")
- FROM InbredSet, Species
- WHERE InbredSet.SpeciesId = Species.Id AND
- (InbredSet.InbredSetName = "{0}" OR
- InbredSet.Name = "{0}" OR
- InbredSet.FullName = "{0}") AND
- (Species.Name = "{1}" OR
- Species.FullName="{1}" OR
- Species.SpeciesName="{1}");""".format(group_name, species_name))
- else:
- results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
- InbredSet.Name, InbredSet.FullName, InbredSet.public,
- IFNULL(InbredSet.MappingMethodId, "None"), IFNULL(InbredSet.GeneticType, "None")
- FROM InbredSet
- WHERE (InbredSet.InbredSetName = "{0}" OR
- InbredSet.Name = "{0}" OR
- InbredSet.FullName = "{0}");""".format(group_name))
+ group = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ if species_name:
+ cursor.execute(
+ "SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, "
+ "InbredSet.InbredSetName, InbredSet.Name, "
+ "InbredSet.FullName, InbredSet.public, "
+ "IFNULL(InbredSet.MappingMethodId, 'None'), "
+ "IFNULL(InbredSet.GeneticType, 'None') "
+ "FROM InbredSet, Species WHERE "
+ "InbredSet.SpeciesId = Species.Id "
+ "AND (InbredSet.InbredSetName = %s OR "
+ "InbredSet.Name = %s OR InbredSet.FullName = %s) "
+ "AND (Species.Name = %s OR "
+ "Species.FullName = %s OR Species.SpeciesName = %s)",
+ (*(group_name,)*3, *(species_name,)*3)
+ )
+ else:
+ cursor.execute(
+ "SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, "
+ "InbredSet.InbredSetName, InbredSet.Name, "
+ "InbredSet.FullName, InbredSet.public, "
+ "IFNULL(InbredSet.MappingMethodId, 'None'), "
+ "IFNULL(InbredSet.GeneticType, 'None') "
+ "FROM InbredSet WHERE "
+ "(InbredSet.InbredSetName = %s OR "
+ "InbredSet.Name = %s OR "
+ "InbredSet.FullName = %s)",
+ ((group_name,)*3)
+ )
+ results = cursor.fetchone()
group = results.fetchone()
if group:
@@ -156,36 +175,48 @@ def get_group_info(group_name, species_name=None, file_format="json"):
@app.route("/api/v_{}/datasets/<path:group_name>".format(version))
@app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version))
def get_datasets_for_group(group_name, species_name=None):
- if species_name:
- results = g.db.execute("""
- SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID,
- ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName,
- ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public,
- ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale
- FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species
- WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
- ProbeFreeze.InbredSetId = InbredSet.Id AND
- (InbredSet.Name = "{0}" OR InbredSet.InbredSetName = "{0}" OR InbredSet.FullName = "{0}") AND
- InbredSet.SpeciesId = Species.Id AND
- (Species.SpeciesName = "{1}" OR Species.MenuName = "{1}" OR Species.FullName = "{1}");
- """.format(group_name, species_name))
- else:
- results = g.db.execute("""
- SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID,
- ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName,
- ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public,
- ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale
- FROM ProbeSetFreeze, ProbeFreeze, InbredSet
- WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
- ProbeFreeze.InbredSetId = InbredSet.Id AND
- (InbredSet.Name = "{0}" OR InbredSet.InbredSetName = "{0}" OR InbredSet.FullName = "{0}");
- """.format(group_name))
-
- the_datasets = results.fetchall()
-
- if the_datasets:
+ _datasets = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ if species_name:
+ cursor.execute(
+ "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, "
+ "ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, "
+ "ProbeSetFreeze.Name2, "
+ "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, "
+ "ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, "
+ "ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale "
+ "FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species "
+ "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
+ "AND ProbeFreeze.InbredSetId = InbredSet.Id "
+ "AND (InbredSet.Name = %s OR "
+ "InbredSet.InbredSetName = %s OR "
+ "InbredSet.FullName = %s) AND "
+ "InbredSet.SpeciesId = Species.Id AND "
+ "(Species.SpeciesName = %s OR "
+ "Species.MenuName = %s OR Species.FullName = %s);",
+ (*(group_name,)*3, *(species_name)*3)
+ )
+ else:
+ cursor.execute(
+ "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, "
+ "ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, "
+ "ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, "
+ "ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, "
+ "ProbeSetFreeze.public, ProbeSetFreeze.confidentiality, "
+ "ProbeSetFreeze.DataScale FROM ProbeSetFreeze, "
+ "ProbeFreeze, InbredSet WHERE "
+ "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
+ "AND ProbeFreeze.InbredSetId = InbredSet.Id "
+ "AND (InbredSet.Name = %s OR "
+ "InbredSet.InbredSetName = %s OR "
+ "InbredSet.FullName = %s)",
+ ((group_name,) * 3)
+ )
+ _datasets = cursor.fetchall()
+
+ if _datasets:
datasets_list = []
- for dataset in the_datasets:
+ for dataset in _datasets:
dataset_dict = {
"Id": dataset[0],
"ProbeFreezeId": dataset[1],
@@ -213,8 +244,10 @@ def get_datasets_for_group(group_name, species_name=None):
def get_dataset_info(dataset_name, group_name=None, file_format="json"):
# ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets
- datasets_list = [] # ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice
-
+ # ZS: I figure I might as well return a list if there are multiple
+ # matches, though I don"t know if this will actually happen in
+ # practice
+ datasets_list, dataset_dict = [], {}
probeset_query = """
SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName,
ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId,
@@ -237,71 +270,71 @@ def get_dataset_info(dataset_name, group_name=None, file_format="json"):
(ProbeSetFreeze.Name = "{0}" OR ProbeSetFreeze.Name2 = "{0}" OR
ProbeSetFreeze.FullName = "{0}" OR ProbeSetFreeze.ShortName = "{0}")
""".format(dataset_name)
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(f"{probeset_query}{where_statement}")
- probeset_query += where_statement
- probeset_results = g.db.execute(probeset_query)
- dataset = probeset_results.fetchone()
-
- if dataset:
- dataset_dict = {
- "dataset_type": "mRNA expression",
- "id": dataset[0],
- "name": dataset[1],
- "full_name": dataset[2],
- "short_name": dataset[3],
- "data_scale": dataset[4],
- "tissue_id": dataset[5],
- "tissue": dataset[6],
- "public": dataset[7],
- "confidential": dataset[8]
- }
-
- datasets_list.append(dataset_dict)
-
- if group_name:
- pheno_query = """
- SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description,
- Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description,
- Publication.PubMed_ID, Publication.Title, Publication.Year
- FROM PublishXRef, Phenotype, Publication, InbredSet, PublishFreeze
- WHERE PublishXRef.InbredSetId = InbredSet.Id AND
- PublishXRef.PhenotypeId = Phenotype.Id AND
- PublishXRef.PublicationId = Publication.Id AND
- PublishFreeze.InbredSetId = InbredSet.Id AND
- PublishFreeze.public > 0 AND
- PublishFreeze.confidentiality < 1 AND
- InbredSet.Name = "{0}" AND PublishXRef.Id = "{1}"
- """.format(group_name, dataset_name)
-
- pheno_results = g.db.execute(pheno_query)
- dataset = pheno_results.fetchone()
-
- if dataset:
- if dataset[5]:
- dataset_dict = {
- "dataset_type": "phenotype",
- "id": dataset[0],
- "name": dataset[1],
- "description": dataset[2],
- "pubmed_id": dataset[5],
- "title": dataset[6],
- "year": dataset[7]
- }
- elif dataset[4]:
- dataset_dict = {
- "dataset_type": "phenotype",
- "id": dataset[0],
- "name": dataset[3],
- "description": dataset[4]
- }
- else:
- dataset_dict = {
- "dataset_type": "phenotype",
- "id": dataset[0]
- }
+ if dataset := cursor.fetchone():
+ dataset_dict = {
+ "dataset_type": "mRNA expression",
+ "id": dataset[0],
+ "name": dataset[1],
+ "full_name": dataset[2],
+ "short_name": dataset[3],
+ "data_scale": dataset[4],
+ "tissue_id": dataset[5],
+ "tissue": dataset[6],
+ "public": dataset[7],
+ "confidential": dataset[8]
+ }
datasets_list.append(dataset_dict)
+ if group_name:
+ cursor.execute(
+ "SELECT PublishXRef.Id, "
+ "Phenotype.Post_publication_abbreviation, "
+ "Phenotype.Post_publication_description, "
+ "Phenotype.Pre_publication_abbreviation, "
+ "Phenotype.Pre_publication_description, "
+ "Publication.PubMed_ID, Publication.Title, "
+ "Publication.Year FROM PublishXRef, Phenotype, "
+ "Publication, InbredSet, PublishFreeze WHERE "
+ "PublishXRef.InbredSetId = InbredSet.Id "
+ "AND PublishXRef.PhenotypeId = Phenotype.Id "
+ "AND PublishXRef.PublicationId = Publication.Id "
+ "AND PublishFreeze.InbredSetId = InbredSet.Id "
+ "AND PublishFreeze.public > 0 AND "
+ "PublishFreeze.confidentiality < 1 "
+ "AND InbredSet.Name = %s AND PublishXRef.Id = %s",
+ (group_name, dataset_name,)
+ )
+
+ if dataset := cursor.fetchone():
+ if dataset[5]:
+ dataset_dict = {
+ "dataset_type": "phenotype",
+ "id": dataset[0],
+ "name": dataset[1],
+ "description": dataset[2],
+ "pubmed_id": dataset[5],
+ "title": dataset[6],
+ "year": dataset[7]
+ }
+ elif dataset[4]:
+ dataset_dict = {
+ "dataset_type": "phenotype",
+ "id": dataset[0],
+ "name": dataset[3],
+ "description": dataset[4]
+ }
+ else:
+ dataset_dict = {
+ "dataset_type": "phenotype",
+ "id": dataset[0]
+ }
+
+ datasets_list.append(dataset_dict)
+
if len(datasets_list) > 1:
return flask.jsonify(datasets_list)
elif len(datasets_list) == 1:
@@ -403,44 +436,48 @@ def fetch_traits(dataset_name, file_format="json"):
if 'limit_to' in request.args:
limit_number = request.args['limit_to']
query += "LIMIT " + str(limit_number)
-
- if file_format == "json":
- filename = dataset_name + "_traits.json"
-
- final_query = query.format(dataset_id)
-
- result_list = []
- for result in g.db.execute(final_query).fetchall():
- trait_dict = {}
- for i, field in enumerate(field_list):
- if result[i]:
- trait_dict[field] = result[i]
- result_list.append(trait_dict)
-
- return flask.jsonify(result_list)
- elif file_format == "csv":
- filename = dataset_name + "_traits.csv"
-
- results_list = []
- header_list = []
- header_list += field_list
- results_list.append(header_list)
-
- final_query = query.format(dataset_id)
- for result in g.db.execute(final_query).fetchall():
- results_list.append(result)
-
- si = io.StringIO()
- csv_writer = csv.writer(si)
- csv_writer.writerows(results_list)
- output = make_response(si.getvalue())
- output.headers["Content-Disposition"] = "attachment; filename=" + filename
- output.headers["Content-type"] = "text/csv"
- return output
- else:
- return return_error(code=400, source=request.url_rule.rule, title="Invalid Output Format", details="Current formats available are JSON and CSV, with CSV as default")
+ with database_connection() as conn, conn.cursor() as cursor:
+ if file_format == "json":
+ filename = dataset_name + "_traits.json"
+ cursor.execute(query.format(dataset_id))
+ for result in cursor.fetchall():
+ trait_dict = {}
+ for i, field in enumerate(field_list):
+ if result[i]:
+ trait_dict[field] = result[i]
+ result_list.append(trait_dict)
+ return flask.jsonify(result_list)
+ elif file_format == "csv":
+ filename = dataset_name + "_traits.csv"
+
+ results_list = []
+ header_list = []
+ header_list += field_list
+ results_list.append(header_list)
+ cursor.execute(query.format(dataset_id))
+ for result in cursor.fetchall():
+ results_list.append(result)
+
+ si = io.StringIO()
+ csv_writer = csv.writer(si)
+ csv_writer.writerows(results_list)
+ output = make_response(si.getvalue())
+ output.headers["Content-Disposition"] = "attachment; filename=" + filename
+ output.headers["Content-type"] = "text/csv"
+ return output
+ else:
+ return return_error(
+ code=400,
+ source=request.url_rule.rule,
+ title="Invalid Output Format",
+ details="Current formats available are JSON and CSV, with CSV as default"
+ )
else:
- return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+ return return_error(
+ code=204,
+ source=request.url_rule.rule,
+ title="No Results",
+ details="")
@app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version))
@@ -521,20 +558,21 @@ def all_sample_data(dataset_name, file_format="csv"):
header_list.append("id")
header_list += sample_list
results_list.append(header_list)
- for i, trait_id in enumerate(trait_ids):
- line_list = []
- line_list.append(str(trait_names[i]))
- final_query = query.format(dataset_id, trait_id)
- results = g.db.execute(final_query).fetchall()
- results_dict = {}
- for item in results:
- results_dict[item[0]] = item[2]
- for sample in sample_list:
- if sample in results_dict:
- line_list.append(results_dict[sample])
- else:
- line_list.append("x")
- results_list.append(line_list)
+ with database_connection() as conn, conn.cursor() as cursor:
+ for i, trait_id in enumerate(trait_ids):
+ line_list = []
+ line_list.append(str(trait_names[i]))
+ cursor.execute(query.format(dataset_id, trait_id))
+ results = cursor.fetchall()
+ results_dict = {}
+ for item in results:
+ results_dict[item[0]] = item[2]
+ for sample in sample_list:
+ if sample in results_dict:
+ line_list.append(results_dict[sample])
+ else:
+ line_list.append("x")
+ results_list.append(line_list)
results_list = list(map(list, zip(*results_list)))
@@ -554,73 +592,26 @@ def all_sample_data(dataset_name, file_format="csv"):
@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version))
@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
def trait_sample_data(dataset_name, trait_name, file_format="json"):
- probeset_query = """
- SELECT
- Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error
- FROM
- (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
- LEFT JOIN ProbeSetSE ON
- (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
- WHERE
- ProbeSet.Name = "{0}" AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = "{1}" AND
- ProbeSetXRef.DataId = ProbeSetData.Id AND
- ProbeSetData.StrainId = Strain.Id
- ORDER BY
- Strain.Name
- """.format(trait_name, dataset_name)
-
- probeset_results = g.db.execute(probeset_query)
-
- sample_data = probeset_results.fetchall()
- if len(sample_data) > 0:
- sample_list = []
- for sample in sample_data:
- sample_dict = {
- "sample_name": sample[0],
- "sample_name_2": sample[1],
- "value": sample[2],
- "data_id": sample[3],
- }
- if sample[4]:
- sample_dict["se"] = sample[4]
- sample_list.append(sample_dict)
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, Strain.Name2, "
+ "ProbeSetData.value, ProbeSetData.Id, "
+ "ProbeSetSE.error FROM (ProbeSetData, "
+ "ProbeSetFreeze, Strain, ProbeSet, "
+ "ProbeSetXRef) LEFT JOIN ProbeSetSE ON "
+ "(ProbeSetSE.DataId = ProbeSetData.Id AND "
+ "ProbeSetSE.StrainId = ProbeSetData.StrainId) "
+ "WHERE ProbeSet.Name = %s AND "
+ "ProbeSetXRef.ProbeSetId = ProbeSet.Id "
+ "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetFreeze.Name = %s AND "
+ "ProbeSetXRef.DataId = ProbeSetData.Id "
+ "AND ProbeSetData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name",
+ (trait_name, dataset_name,)
+ )
- return flask.jsonify(sample_list)
- else:
- if not dataset_name.isdigit():
- group_id = get_group_id(dataset_name)
- if group_id:
- dataset_or_group = group_id
- else:
- dataset_or_group = dataset_name
- else:
- dataset_or_group = dataset_name
-
- pheno_query = """
- SELECT DISTINCT
- Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count
- FROM
- (PublishData, Strain, PublishXRef, PublishFreeze)
- LEFT JOIN PublishSE ON
- (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
- LEFT JOIN NStrain ON
- (NStrain.DataId = PublishData.Id AND
- NStrain.StrainId = PublishData.StrainId)
- WHERE
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = "{1}" AND
- (PublishFreeze.Id = "{0}" OR PublishFreeze.Name = "{0}" OR
- PublishFreeze.ShortName = "{0}" OR PublishXRef.InbredSetId = "{0}") AND
- PublishData.StrainId = Strain.Id
- ORDER BY
- Strain.Name
- """.format(dataset_or_group, trait_name)
-
- pheno_results = g.db.execute(pheno_query)
-
- sample_data = pheno_results.fetchall()
+ sample_data = cursor.fetchall()
if len(sample_data) > 0:
sample_list = []
for sample in sample_data:
@@ -628,17 +619,61 @@ def trait_sample_data(dataset_name, trait_name, file_format="json"):
"sample_name": sample[0],
"sample_name_2": sample[1],
"value": sample[2],
- "data_id": sample[3]
+ "data_id": sample[3],
}
if sample[4]:
sample_dict["se"] = sample[4]
- if sample[5]:
- sample_dict["n_cases"] = sample[5]
sample_list.append(sample_dict)
return flask.jsonify(sample_list)
else:
- return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+ if not dataset_name.isdigit():
+ group_id = get_group_id(dataset_name)
+ if group_id:
+ dataset_or_group = group_id
+ else:
+ dataset_or_group = dataset_name
+ else:
+ dataset_or_group = dataset_name
+
+ cursor.execute(
+ "SELECT DISTINCT Strain.Name, Strain.Name2, "
+ "PublishData.value, PublishData.Id, PublishSE.error, "
+ "NStrain.count FROM (PublishData, Strain, "
+ "PublishXRef, PublishFreeze) LEFT JOIN "
+ "PublishSE ON (PublishSE.DataId = PublishData.Id "
+ "AND PublishSE.StrainId = PublishData.StrainId) "
+ "LEFT JOIN NStrain ON "
+ "(NStrain.DataId = PublishData.Id AND "
+ "NStrain.StrainId = PublishData.StrainId) "
+ "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
+ "AND PublishData.Id = PublishXRef.DataId AND "
+ "PublishXRef.Id = %s AND (PublishFreeze.Id = %s "
+ "OR PublishFreeze.Name = %s OR "
+ "PublishFreeze.ShortName = %s OR "
+ "PublishXRef.InbredSetId = %s) AND "
+ "PublishData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name",
+ (trait_name, *(dataset_or_group,)*4)
+ )
+ if len(sample_data := cursor.fetchall()) > 0:
+ sample_list = []
+ for sample in sample_data:
+ sample_dict = {
+ "sample_name": sample[0],
+ "sample_name_2": sample[1],
+ "value": sample[2],
+ "data_id": sample[3]
+ }
+ if sample[4]:
+ sample_dict["se"] = sample[4]
+ if sample[5]:
+ sample_dict["n_cases"] = sample[5]
+ sample_list.append(sample_dict)
+
+ return flask.jsonify(sample_list)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version))
@@ -646,70 +681,64 @@ def trait_sample_data(dataset_name, trait_name, file_format="json"):
@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version))
@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
def get_trait_info(dataset_name, trait_name, file_format="json"):
- probeset_query = """
- SELECT
- ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias,
- ProbeSetXRef.mean, ProbeSetXRef.se, ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.additive
- FROM
- ProbeSet, ProbeSetXRef, ProbeSetFreeze
- WHERE
- ProbeSet.Name = "{0}" AND
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = "{1}"
- """.format(trait_name, dataset_name)
-
- probeset_results = g.db.execute(probeset_query)
-
- trait_info = probeset_results.fetchone()
- if trait_info:
- trait_dict = {
- "id": trait_info[0],
- "name": trait_info[1],
- "symbol": trait_info[2],
- "description": trait_info[3],
- "chr": trait_info[4],
- "mb": trait_info[5],
- "alias": trait_info[6],
- "mean": trait_info[7],
- "se": trait_info[8],
- "locus": trait_info[9],
- "lrs": trait_info[10],
- "p_value": trait_info[11],
- "additive": trait_info[12]
- }
-
- return flask.jsonify(trait_dict)
- else:
- # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish"
- if "Publish" in dataset_name:
- dataset_name = dataset_name.replace("Publish", "")
-
- group_id = get_group_id(dataset_name)
- pheno_query = """
- SELECT
- PublishXRef.PhenotypeId, PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
- FROM
- PublishXRef
- WHERE
- PublishXRef.Id = "{0}" AND
- PublishXRef.InbredSetId = "{1}"
- """.format(trait_name, group_id)
-
- pheno_results = g.db.execute(pheno_query)
-
- trait_info = pheno_results.fetchone()
- if trait_info:
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, "
+ "ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, "
+ "ProbeSet.alias, ProbeSetXRef.mean, ProbeSetXRef.se, "
+ "ProbeSetXRef.Locus, ProbeSetXRef.LRS, "
+ "ProbeSetXRef.pValue, ProbeSetXRef.additive "
+ "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze "
+ "WHERE ProbeSet.Name = %s AND "
+ "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetFreeze.Name = %s",
+ (trait_name, dataset_name,)
+ )
+ if trait_info := cursor.fetchone():
trait_dict = {
"id": trait_info[0],
- "locus": trait_info[1],
- "lrs": trait_info[2],
- "additive": trait_info[3]
+ "name": trait_info[1],
+ "symbol": trait_info[2],
+ "description": trait_info[3],
+ "chr": trait_info[4],
+ "mb": trait_info[5],
+ "alias": trait_info[6],
+ "mean": trait_info[7],
+ "se": trait_info[8],
+ "locus": trait_info[9],
+ "lrs": trait_info[10],
+ "p_value": trait_info[11],
+ "additive": trait_info[12]
}
return flask.jsonify(trait_dict)
else:
- return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+ # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish"
+ if "Publish" in dataset_name:
+ dataset_name = dataset_name.replace("Publish", "")
+
+ group_id = get_group_id(dataset_name)
+ cursor.execute(
+ "SELECT PublishXRef.PhenotypeId, "
+ "PublishXRef.Locus, PublishXRef.LRS, "
+ "PublishXRef.additive FROM "
+ "PublishXRef WHERE "
+ "PublishXRef.Id = %s AND "
+ "PublishXRef.InbredSetId = %s",
+ (trait_name, group_id,)
+ )
+ if trait_info := cursor.fetchone():
+ trait_dict = {
+ "id": trait_info[0],
+ "locus": trait_info[1],
+ "lrs": trait_info[2],
+ "additive": trait_info[3]
+ }
+
+ return flask.jsonify(trait_dict)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/correlation".format(version), methods=("GET",))
@@ -872,129 +901,108 @@ def get_dataset_trait_ids(dataset_name, start_vars):
limit_string = "LIMIT " + str(start_vars['limit_to'])
else:
limit_string = ""
+ with database_connection() as conn, conn.cursor() as cursor:
+ if "Geno" in dataset_name:
+ data_type = "Geno" # ZS: Need to pass back the dataset type
+ cursor.execute(
+ "SELECT GenoXRef.GenoId, Geno.Name, "
+ "GenoXRef.GenoFreezeId FROM Geno, "
+ "GenoXRef, GenoFreeze WHERE "
+ "Geno.Id = GenoXRef.GenoId AND "
+ "GenoXRef.GenoFreezeId = GenoFreeze.Id "
+ f"AND GenoFreeze.Name = %s {limit_string}",
+ (dataset_name,))
+
+ results = cursor.fetchall()
+
+ trait_ids = [result[0] for result in results]
+ trait_names = [result[1] for result in results]
+ dataset_id = results[0][2]
+ return trait_ids, trait_names, data_type, dataset_id
+
+ elif "Publish" in dataset_name or get_group_id(dataset_name):
+ data_type = "Publish"
+ dataset_name = dataset_name.replace("Publish", "")
+ dataset_id = get_group_id(dataset_name)
+ cursor.execute(
+ "SELECT PublishXRef.PhenotypeId, "
+ "PublishXRef.Id, InbredSet.InbredSetCode "
+ "FROM PublishXRef, InbredSet WHERE "
+ "PublishXRef.InbredSetId = %s AND "
+ "InbredSet.Id = PublishXRef.InbredSetId "
+ f"{limit_string}",
+ (dataset_id,)
+ )
+ results = cursor.fetchall()
+
+ trait_ids = [result[0] for result in results]
+ trait_names = [str(result[2]) + "_" + str(result[1])
+ for result in results]
+
+ return trait_ids, trait_names, data_type, dataset_id
- if "Geno" in dataset_name:
- data_type = "Geno" # ZS: Need to pass back the dataset type
- query = """
- SELECT
- GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId
- FROM
- Geno, GenoXRef, GenoFreeze
- WHERE
- Geno.Id = GenoXRef.GenoId AND
- GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = "{0}"
- {1}
- """.format(dataset_name, limit_string)
-
- results = g.db.execute(query).fetchall()
-
- trait_ids = [result[0] for result in results]
- trait_names = [result[1] for result in results]
- dataset_id = results[0][2]
- return trait_ids, trait_names, data_type, dataset_id
-
- elif "Publish" in dataset_name or get_group_id(dataset_name):
- data_type = "Publish"
- dataset_name = dataset_name.replace("Publish", "")
- dataset_id = get_group_id(dataset_name)
-
- query = """
- SELECT
- PublishXRef.PhenotypeId, PublishXRef.Id, InbredSet.InbredSetCode
- FROM
- PublishXRef, InbredSet
- WHERE
- PublishXRef.InbredSetId = "{0}" AND
- InbredSet.Id = PublishXRef.InbredSetId
- {1}
- """.format(dataset_id, limit_string)
-
- results = g.db.execute(query).fetchall()
-
- trait_ids = [result[0] for result in results]
- trait_names = [str(result[2]) + "_" + str(result[1])
- for result in results]
-
- return trait_ids, trait_names, data_type, dataset_id
-
- else:
- data_type = "ProbeSet"
- query = """
- SELECT
- ProbeSetXRef.ProbeSetId, ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId
- FROM
- ProbeSet, ProbeSetXRef, ProbeSetFreeze
- WHERE
- ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = "{0}"
- {1}
- """.format(dataset_name, limit_string)
-
- results = g.db.execute(query).fetchall()
-
- trait_ids = [result[0] for result in results]
- trait_names = [result[1] for result in results]
- dataset_id = results[0][2]
- return trait_ids, trait_names, data_type, dataset_id
+ else:
+ data_type = "ProbeSet"
+ cursor.execute(
+ "SELECT ProbeSetXRef.ProbeSetId, "
+ "ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId "
+ "FROM ProbeSet, ProbeSetXRef, "
+ "ProbeSetFreeze WHERE "
+ "ProbeSet.Id = ProbeSetXRef.ProbeSetId AND "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ f"AND ProbeSetFreeze.Name = %s {limit_string}",
+ (dataset_name,)
+ )
+ results = cursor.fetchall()
+ trait_ids = [result[0] for result in results]
+ trait_names = [result[1] for result in results]
+ dataset_id = results[0][2]
+ return trait_ids, trait_names, data_type, dataset_id
def get_samplelist(dataset_name):
- group_id = get_group_id_from_dataset(dataset_name)
-
- query = """
- SELECT Strain.Name
- FROM Strain, StrainXRef
- WHERE StrainXRef.StrainId = Strain.Id AND
- StrainXRef.InbredSetId = {}
- """.format(group_id)
-
- results = g.db.execute(query).fetchall()
-
- samplelist = [result[0] for result in results]
-
- return samplelist
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name FROM Strain, StrainXRef "
+ "WHERE StrainXRef.StrainId = Strain.Id AND "
+ "StrainXRef.InbredSetId = %s",
+ (dataset_name,)
+ )
+ # sample list
+ return [result[0] for result in cursor.fetchall()]
def get_group_id_from_dataset(dataset_name):
- if "Publish" in dataset_name:
- query = """
- SELECT
- InbredSet.Id
- FROM
- InbredSet, PublishFreeze
- WHERE
- PublishFreeze.InbredSetId = InbredSet.Id AND
- PublishFreeze.Name = "{}"
- """.format(dataset_name)
- elif "Geno" in dataset_name:
- query = """
- SELECT
- InbredSet.Id
- FROM
- InbredSet, GenoFreeze
- WHERE
- GenoFreeze.InbredSetId = InbredSet.Id AND
- GenoFreeze.Name = "{}"
- """.format(dataset_name)
- else:
- query = """
- SELECT
- InbredSet.Id
- FROM
- InbredSet, ProbeSetFreeze, ProbeFreeze
- WHERE
- ProbeFreeze.InbredSetId = InbredSet.Id AND
- ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
- ProbeSetFreeze.Name = "{}"
- """.format(dataset_name)
-
- result = g.db.execute(query).fetchone()
-
- if len(result) > 0:
- return result[0]
- else:
+ result = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ if "Publish" in dataset_name:
+ cursor.execute(
+ "SELECT InbredSet.Id FROM "
+ "InbredSet, PublishFreeze "
+ "WHERE PublishFreeze.InbredSetId = InbredSet.Id "
+ "AND PublishFreeze.Name = %s",
+ (dataset_name,)
+ )
+ elif "Geno" in dataset_name:
+ cursor.execute(
+ "SELECT InbredSet.Id FROM "
+ "InbredSet, GenoFreeze WHERE "
+ "GenoFreeze.InbredSetId = InbredSet.Id "
+ "AND GenoFreeze.Name = %s",
+ (dataset_name,)
+ )
+ else:
+ cursor.execute(
+ "SELECT InbredSet.Id FROM "
+ "InbredSet, ProbeSetFreeze, "
+ "ProbeFreeze WHERE "
+ "ProbeFreeze.InbredSetId = InbredSet.Id "
+ "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId "
+ "AND ProbeSetFreeze.Name = %s",
+ (dataset_name,)
+ )
+ if result := cursor.fetchone():
+ return result[0]
return None