aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base/data_set.py
diff options
context:
space:
mode:
authorMunyoki Kilyungi2022-09-05 10:21:06 +0300
committerBonfaceKilz2022-09-08 14:26:19 +0300
commit8aef689bb1ab735ad4605af8b8b282672f92d2cf (patch)
tree92506b8120f4223bfe92784ce7bd83d70ecae20b /wqflask/base/data_set.py
parentc341c4565126c47c78f220871d9e334dbe5aec73 (diff)
downloadgenenetwork2-8aef689bb1ab735ad4605af8b8b282672f92d2cf.tar.gz
Replace g.db with database_connection() context manager
* wqflask/base/data_set.py (DatasetType.set_dataset_key): Replace g.db with "database_connection()". (DatasetGroup.get_mapping_methods): Ditto. (DataSet.get_accession_id): Ditto. (DataSet.retrieve_other_names): Rework "if ... else" logic to get rid of "query" and "_vars" variables. Also, nest "try ... except" block within "database_connection". (DataSet.chunk_dataset): Replace g.db with "database_connection()". (DataSet.get_probeset_data): Ditto. (DataSet.get_trait_data): Ditto. (PhenotypeDataSet.get_trait_info): Ditto. (PhenotypeDataSet.retrieve_sample_data): Ditto. (GenotypeDataSet.retrieve_sample_data): Ditto. (MrnaAssayDataSet.get_trait_info): Ditto. (MrnaAssayDataSet.retrieve_sample_data): Ditto. (MrnaAssayDataSet.retrieve_genes): Ditto. (TempDataSet): Ditto. (geno_mrna_confidentiality): Ditto. (query_table_timestamp): Ditto. * wqflask/wqflask/api/router.py: Replace "flask.g" with database_connection import. (get_species_list): Replace g.db with "database_connection()". (get_species_info): Ditto. (get_groups_list): Ditto. (get_group_info): Ditto. (get_datasets_for_group): Ditto. (get_dataset_info): Ditto. (fetch_traits): Ditto. (all_sample_data): Ditto. (trait_sample_data): Ditto. (get_trait_info): Ditto. (get_dataset_trait_ids): Ditto. (get_samplelist): Ditto. (get_group_id_from_dataset): Ditto. (get_group_id): Ditto.
Diffstat (limited to 'wqflask/base/data_set.py')
-rw-r--r--wqflask/base/data_set.py711
1 files changed, 355 insertions, 356 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 2f4c1154..ade38131 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -141,18 +141,18 @@ class DatasetType:
sql_query_mapping = {
'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM "
"ProbeSetFreeze WHERE "
- "ProbeSetFreeze.Name = \"%s\" "),
+ "ProbeSetFreeze.Name = %s "),
'pheno': ("SELECT InfoFiles.GN_AccesionId "
"FROM InfoFiles, PublishFreeze, InbredSet "
- "WHERE InbredSet.Name = '%s' AND "
+ "WHERE InbredSet.Name = %s AND "
"PublishFreeze.InbredSetId = InbredSet.Id AND "
"InfoFiles.InfoPageName = PublishFreeze.Name"),
'other_pheno': ("SELECT PublishFreeze.Name "
"FROM PublishFreeze, InbredSet "
- "WHERE InbredSet.Name = '%s' AND "
+ "WHERE InbredSet.Name = %s AND "
"PublishFreeze.InbredSetId = InbredSet.Id"),
'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE "
- "GenoFreeze.Name = \"%s\" ")
+ "GenoFreeze.Name = %s ")
}
dataset_name_mapping = {
@@ -166,13 +166,14 @@ class DatasetType:
if t in ['pheno', 'other_pheno']:
group_name = name.replace("Publish", "")
- results = g.db.execute(sql_query_mapping[t] % group_name).fetchone()
- if results:
- self.datasets[name] = dataset_name_mapping[t]
- self.redis_instance.set(
- "dataset_structure", json.dumps(self.datasets))
- return True
- return None
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(sql_query_mapping[t], (group_name,))
+ if cursor.fetchone():
+ self.datasets[name] = dataset_name_mapping[t]
+ self.redis_instance.set(
+ "dataset_structure", json.dumps(self.datasets))
+ return True
+
def __call__(self, name):
if name not in self.datasets:
@@ -348,9 +349,13 @@ class DatasetGroup:
self.genofile = None
def get_mapping_methods(self):
-
- mapping_id = g.db.execute(
- "select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0]
+ mapping_id = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT MappingMethodId FROM "
+ "InbredSet WHERE Name= %s"
+ (self.name,))
+ mapping_id = cursor.fetchone()[0]
if mapping_id == "1":
mapping_names = ["GEMMA", "QTLReaper", "R/qtl"]
elif mapping_id == "2":
@@ -589,29 +594,38 @@ class DataSet:
}
def get_accession_id(self):
- if self.type == "Publish":
- results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
- InbredSet.Name = %s and
- PublishFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = PublishFreeze.Name and
- PublishFreeze.public > 0 and
- PublishFreeze.confidentiality < 1 order by
- PublishFreeze.CreateTime desc""", (self.group.name)).fetchone()
- elif self.type == "Geno":
- results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
- InbredSet.Name = %s and
- GenoFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = GenoFreeze.ShortName and
- GenoFreeze.public > 0 and
- GenoFreeze.confidentiality < 1 order by
- GenoFreeze.CreateTime desc""", (self.group.name)).fetchone()
- else:
- results = None
+ results = None
+ with database_connection() as conn, conn.cursor() as cursor:
+ if self.type == "Publish":
+ cursor.execute(
+ "SELECT InfoFiles.GN_AccesionId FROM "
+ "InfoFiles, PublishFreeze, InbredSet "
+ "WHERE InbredSet.Name = %s AND "
+ "PublishFreeze.InbredSetId = InbredSet.Id "
+ "AND InfoFiles.InfoPageName = PublishFreeze.Name "
+ "AND PublishFreeze.public > 0 AND "
+ "PublishFreeze.confidentiality < 1 "
+ "ORDER BY PublishFreeze.CreateTime DESC",
+ (self.group.name,)
+ )
+ results = cursor.fetchone()
+ elif self.type == "Geno":
+ cursor.execute(
+ "SELECT InfoFiles.GN_AccesionId FROM "
+ "InfoFiles, GenoFreeze, InbredSet "
+ "WHERE InbredSet.Name = %s AND "
+ "GenoFreeze.InbredSetId = InbredSet.Id "
+ "AND InfoFiles.InfoPageName = GenoFreeze.ShortName "
+ "AND GenoFreeze.public > 0 AND "
+ "GenoFreeze.confidentiality < 1 "
+ "ORDER BY GenoFreeze.CreateTime DESC",
+ (self.group.name,)
+ )
+ results = cursor.fetchone()
- if results != None:
+ if results:
return str(results[0])
- else:
- return "None"
+ return "None"
def retrieve_other_names(self):
"""This method fetches the the dataset names in search_result.
@@ -624,52 +638,49 @@ class DataSet:
all is passed.
"""
-
- try:
- query = ""
- _vars = None
- query_args = (self.name, self.name, self.name)
- if self.type == "ProbeSet":
- query = (
- "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, "
- "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, "
- "ProbeSetFreeze.DataScale, Tissue.Name "
- "FROM ProbeSetFreeze, ProbeFreeze, Tissue "
- "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
- "AND ProbeFreeze.TissueId = Tissue.Id "
- "AND (ProbeSetFreeze.Name = %s OR ProbeSetFreeze.FullName = %s OR ProbeSetFreeze.ShortName = %s)")
- else:
- query = (
- "SELECT Id, Name, FullName, ShortName "
- f"FROM {self.type}Freeze "
- "WHERE (Name = %s OR FullName = %s OR ShortName = %s)")
- self.tissue = "N/A"
- with database_connection() as conn, conn.cursor() as cursor:
- cursor.execute(query, query_args)
- _vars = cursor.fetchone()
+ with database_connection() as conn, conn.cursor() as cursor:
+ try:
if self.type == "ProbeSet":
+ cursor.execute(
+ "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, "
+ "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, "
+ "ProbeSetFreeze.DataScale, Tissue.Name "
+ "FROM ProbeSetFreeze, ProbeFreeze, Tissue "
+ "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
+ "AND ProbeFreeze.TissueId = Tissue.Id "
+ "AND (ProbeSetFreeze.Name = %s OR "
+ "ProbeSetFreeze.FullName = %s "
+ "OR ProbeSetFreeze.ShortName = %s)",
+ (self.name,)*3)
(self.id, self.name, self.fullname, self.shortname,
- self.data_scale, self.tissue) = _vars
+ self.data_scale, self.tissue) = cursor.fetchone()
else:
- self.id, self.name, self.fullname, self.shortname = _vars
- except TypeError:
- pass
+ cursor.execute(
+ "SELECT Id, Name, FullName, ShortName "
+ f"FROM {self.type}Freeze "
+ "WHERE (Name = %s OR FullName = "
+ "%s OR ShortName = %s)",
+ (self.name,)*3)
+ (self.id, self.name, self.fullname, self.shortname,
+ self.data_scale, self.tissue) = cursor.fetchone()
+ self.tissue = "N/A"
+ except TypeError:
+ pass
def chunk_dataset(self, dataset, n):
results = {}
-
- query = """
- SELECT ProbeSetXRef.DataId,ProbeSet.Name
- FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze
- WHERE ProbeSetFreeze.Name = '{}' AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetXRef.ProbeSetId = ProbeSet.Id
- """.format(self.name)
-
- # should cache this
-
- traits_name_dict = dict(g.db.execute(query).fetchall())
+ traits_name_dict = ()
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT ProbeSetXRef.DataId,ProbeSet.Name "
+ "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze "
+ "WHERE ProbeSetFreeze.Name = %s AND "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id",
+ (self.name,))
+ # should cache this
+ traits_name_dict = dict(cursor.fetchall())
for i in range(0, len(dataset), n):
matrix = list(dataset[i:i + n])
@@ -691,31 +702,35 @@ class DataSet:
if self.group.parlist != None and self.group.f1list != None:
if (self.group.parlist + self.group.f1list) in self.samplelist:
self.samplelist += self.group.parlist + self.group.f1list
-
- query = """
- SELECT Strain.Name, Strain.Id FROM Strain, Species
- WHERE Strain.Name IN {}
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- results = dict(g.db.execute(query).fetchall())
- sample_ids = [results[item] for item in self.samplelist]
-
- sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
- results.items(), key=lambda item: item[1])]
-
- query = """SELECT * from ProbeSetData
- where StrainID in {}
- and id in (SELECT ProbeSetXRef.DataId
- FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
- WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Name = '{}'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name)
-
- query_results = list(g.db.execute(query).fetchall())
- data_results = self.chunk_dataset(query_results, len(sample_ids))
- self.samplelist = sorted_samplelist
- self.trait_data = data_results
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, Strain.Id FROM "
+ "Strain, Species WHERE Strain.Name IN "
+ f"{create_in_clause(self.samplelist)} "
+ "AND Strain.SpeciesId=Species.Id AND "
+ "Species.name = %s", (self.group.species,)
+ )
+ results = dict(cursor.fetchall())
+ sample_ids = [results[item] for item in self.samplelist]
+
+ sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
+ results.items(), key=lambda item: item[1])]
+
+ cursor.execute(
+ "SELECT * from ProbeSetData WHERE StrainID IN "
+ f"{create_in_clause(sample_ids)} AND id IN "
+ "(SELECT ProbeSetXRef.DataId FROM "
+ "(ProbeSet, ProbeSetXRef, ProbeSetFreeze) "
+ "WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetFreeze.Name = %s AND "
+ "ProbeSet.Id = ProbeSetXRef.ProbeSetId)",
+ (self.name,)
+ )
+
+ query_results = list(cursor.fetchall())
+ data_results = self.chunk_dataset(query_results, len(sample_ids))
+ self.samplelist = sorted_samplelist
+ self.trait_data = data_results
def get_trait_data(self, sample_list=None):
if sample_list:
@@ -727,81 +742,84 @@ class DataSet:
if (self.group.parlist + self.group.f1list) in self.samplelist:
self.samplelist += self.group.parlist + self.group.f1list
- query = """
- SELECT Strain.Name, Strain.Id FROM Strain, Species
- WHERE Strain.Name IN {}
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- results = dict(g.db.execute(query).fetchall())
- sample_ids = [results.get(item)
- for item in self.samplelist if item is not None]
-
- # MySQL limits the number of tables that can be used in a join to 61,
- # so we break the sample ids into smaller chunks
- # Postgres doesn't have that limit, so we can get rid of this after we transition
- chunk_size = 50
- number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
-
- cached_results = fetch_cached_results(self.name, self.type, self.samplelist)
- if cached_results is None:
- trait_sample_data = []
- for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
- if self.type == "Publish":
- dataset_type = "Phenotype"
- else:
- dataset_type = self.type
- temp = ['T%s.value' % item for item in sample_ids_step]
- if self.type == "Publish":
- query = "SELECT {}XRef.Id,".format(escape(self.type))
- else:
- query = "SELECT {}.Name,".format(escape(dataset_type))
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, Strain.Id FROM Strain, Species "
+ f"WHERE Strain.Name IN {create_in_clause(self.samplelist)} "
+ "AND Strain.SpeciesId=Species.Id "
+ "AND Species.name = %s",
+ (self.group.species,)
+ )
+ results = dict(cursor.fetchall())
+ sample_ids = [results.get(item)
+ for item in self.samplelist if item is not None]
+
+ # MySQL limits the number of tables that can be used in a join to 61,
+ # so we break the sample ids into smaller chunks
+ # Postgres doesn't have that limit, so we can get rid of this after we transition
+ chunk_size = 50
+ number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
+
+ cached_results = fetch_cached_results(self.name, self.type, self.samplelist)
+
+ if cached_results is None:
+ trait_sample_data = []
+ for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
+ if self.type == "Publish":
+ dataset_type = "Phenotype"
+ else:
+ dataset_type = self.type
+ temp = ['T%s.value' % item for item in sample_ids_step]
+ if self.type == "Publish":
+ query = "SELECT {}XRef.Id,".format(escape(self.type))
+ else:
+ query = "SELECT {}.Name,".format(escape(dataset_type))
+ data_start_pos = 1
+ query += ', '.join(temp)
+ query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
+ self.type,
+ self.type))
+
+ for item in sample_ids_step:
+ query += """
+ left join {}Data as T{} on T{}.Id = {}XRef.DataId
+ and T{}.StrainId={}\n
+ """.format(*mescape(self.type, item, item, self.type, item, item))
+
+ if self.type == "Publish":
+ query += """
+ WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
+ and {}Freeze.Name = '{}'
+ and {}.Id = {}XRef.{}Id
+ order by {}.Id
+ """.format(*mescape(self.type, self.type, self.type, self.name,
+ dataset_type, self.type, dataset_type, dataset_type))
+ else:
+ query += """
+ WHERE {}XRef.{}FreezeId = {}Freeze.Id
+ and {}Freeze.Name = '{}'
+ and {}.Id = {}XRef.{}Id
+ order by {}.Id
+ """.format(*mescape(self.type, self.type, self.type, self.type,
+ self.name, dataset_type, self.type, self.type, dataset_type))
+ cursor.execute(query)
+ results = cursor.fetchall()
+ trait_sample_data.append([list(result) for result in results])
+
+ trait_count = len(trait_sample_data[0])
+ self.trait_data = collections.defaultdict(list)
+
data_start_pos = 1
- query += ', '.join(temp)
- query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
- self.type,
- self.type))
-
- for item in sample_ids_step:
- query += """
- left join {}Data as T{} on T{}.Id = {}XRef.DataId
- and T{}.StrainId={}\n
- """.format(*mescape(self.type, item, item, self.type, item, item))
-
- if self.type == "Publish":
- query += """
- WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
- and {}Freeze.Name = '{}'
- and {}.Id = {}XRef.{}Id
- order by {}.Id
- """.format(*mescape(self.type, self.type, self.type, self.name,
- dataset_type, self.type, dataset_type, dataset_type))
- else:
- query += """
- WHERE {}XRef.{}FreezeId = {}Freeze.Id
- and {}Freeze.Name = '{}'
- and {}.Id = {}XRef.{}Id
- order by {}.Id
- """.format(*mescape(self.type, self.type, self.type, self.type,
- self.name, dataset_type, self.type, self.type, dataset_type))
-
- results = g.db.execute(query).fetchall()
- trait_sample_data.append([list(result) for result in results])
-
- trait_count = len(trait_sample_data[0])
- self.trait_data = collections.defaultdict(list)
-
- data_start_pos = 1
- for trait_counter in range(trait_count):
- trait_name = trait_sample_data[0][trait_counter][0]
- for chunk_counter in range(int(number_chunks)):
- self.trait_data[trait_name] += (
- trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
-
- cache_dataset_results(
- self.name, self.type, self.samplelist, self.trait_data)
- else:
- self.trait_data = cached_results
+ for trait_counter in range(trait_count):
+ trait_name = trait_sample_data[0][trait_counter][0]
+ for chunk_counter in range(int(number_chunks)):
+ self.trait_data[trait_name] += (
+ trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
+
+ cache_dataset_results(
+ self.name, self.type, self.samplelist, self.trait_data)
+ else:
+ self.trait_data = cached_results
class PhenotypeDataSet(DataSet):
@@ -897,43 +915,41 @@ SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType, InbredSet.InbredSetC
this_trait.LRS_location_repr = "N/A"
if this_trait.lrs:
- query = """
- select Geno.Chr, Geno.Mb from Geno, Species
- where Species.Name = '%s' and
- Geno.Name = '%s' and
- Geno.SpeciesId = Species.Id
- """ % (species, this_trait.locus)
- result = g.db.execute(query).fetchone()
-
- if result:
- if result[0] and result[1]:
- LRS_Chr = result[0]
- LRS_Mb = result[1]
-
- this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
- this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
- LRS_Chr, float(LRS_Mb))
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Geno.Chr, Geno.Mb FROM "
+ "Geno, Species WHERE "
+ "Species.Name = %s AND "
+ "Geno.Name = %s AND "
+ "Geno.SpeciesId = Species.Id",
+ (species, this_trait.locus,)
+ )
+ if result := cursor.fetchone():
+ if result[0] and result[1]:
+ LRS_Chr, LRS_Mb = result[0], result[1]
+ this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
+ this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
+ LRS_Chr, float(LRS_Mb))
def retrieve_sample_data(self, trait):
- query = """
- SELECT
- Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2
- FROM
- (PublishData, Strain, PublishXRef, PublishFreeze)
- left join PublishSE on
- (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
- left join NStrain on
- (NStrain.DataId = PublishData.Id AND
- NStrain.StrainId = PublishData.StrainId)
- WHERE
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
- PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
- results = g.db.execute(query, (trait, self.id)).fetchall()
- return results
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, PublishData.value, "
+ "PublishSE.error, NStrain.count, "
+ "Strain.Name2 FROM (PublishData, Strain, "
+ "PublishXRef, PublishFreeze) LEFT JOIN "
+ "PublishSE ON "
+ "(PublishSE.DataId = PublishData.Id "
+ "AND PublishSE.StrainId = PublishData.StrainId) "
+ "LEFT JOIN NStrain ON "
+ "(NStrain.DataId = PublishData.Id AND "
+ "NStrain.StrainId = PublishData.StrainId) "
+ "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
+ "AND PublishData.Id = PublishXRef.DataId AND "
+ "PublishXRef.Id = %s AND PublishFreeze.Id = %s "
+ "AND PublishData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name", (trait, self.id))
+ return cursor.fetchall()
class GenotypeDataSet(DataSet):
@@ -976,25 +992,25 @@ GenoFreeze.Name = %s"""
this_trait.chr, float(this_trait.mb))
def retrieve_sample_data(self, trait):
- query = """
- SELECT
- Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
- FROM
- (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
- left join GenoSE on
- (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
- WHERE
- Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
- GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = %s AND
- GenoXRef.DataId = GenoData.Id AND
- GenoData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
- results = g.db.execute(query,
- (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
- trait, self.name)).fetchall()
+ results = []
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, GenoData.value, "
+ "GenoSE.error, 'N/A', Strain.Name2 "
+ "FROM (GenoData, GenoFreeze, Strain, Geno, "
+ "GenoXRef) LEFT JOIN GenoSE ON "
+ "(GenoSE.DataId = GenoData.Id AND "
+ "GenoSE.StrainId = GenoData.StrainId) "
+ "WHERE Geno.SpeciesId = %s AND "
+ "Geno.Name = %s AND GenoXRef.GenoId = Geno.Id "
+ "AND GenoXRef.GenoFreezeId = GenoFreeze.Id "
+ "AND GenoFreeze.Name = %s AND "
+ "GenoXRef.DataId = GenoData.Id "
+ "AND GenoData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name",
+ (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
+ trait, self.name,))
+ results = list(cursor.fetchall())
if self.group.name in webqtlUtil.ParInfo:
f1_1, f1_2, ref, nonref = webqtlUtil.ParInfo[self.group.name]
@@ -1073,114 +1089,113 @@ ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND ProbeSetFreeze.Name = %s"""
# Note: setting trait_list to [] is probably not a great idea.
if not trait_list:
trait_list = []
+ with database_connection() as conn, conn.cursor() as cursor:
+ for this_trait in trait_list:
- for this_trait in trait_list:
-
- if not this_trait.haveinfo:
- this_trait.retrieveInfo(QTL=1)
-
- if not this_trait.symbol:
- this_trait.symbol = "N/A"
-
- # XZ, 12/08/2008: description
- # XZ, 06/05/2009: Rob asked to add probe target description
- description_string = str(
- str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
- target_string = str(
- str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
-
- if len(description_string) > 1 and description_string != 'None':
- description_display = description_string
- else:
- description_display = this_trait.symbol
-
- if (len(description_display) > 1 and description_display != 'N/A'
- and len(target_string) > 1 and target_string != 'None'):
- description_display = description_display + '; ' + target_string.strip()
-
- # Save it for the jinja2 template
- this_trait.description_display = description_display
-
- if this_trait.chr and this_trait.mb:
- this_trait.location_repr = 'Chr%s: %.6f' % (
- this_trait.chr, float(this_trait.mb))
-
- # Get mean expression value
- query = (
- """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
- where ProbeSetXRef.ProbeSetFreezeId = %s and
- ProbeSet.Id = ProbeSetXRef.ProbeSetId and
- ProbeSet.Name = '%s'
- """ % (escape(str(this_trait.dataset.id)),
- escape(this_trait.name)))
- result = g.db.execute(query).fetchone()
+ if not this_trait.haveinfo:
+ this_trait.retrieveInfo(QTL=1)
- mean = result[0] if result else 0
+ if not this_trait.symbol:
+ this_trait.symbol = "N/A"
- if mean:
- this_trait.mean = "%2.3f" % mean
+ # XZ, 12/08/2008: description
+ # XZ, 06/05/2009: Rob asked to add probe target description
+ description_string = str(
+ str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
+ target_string = str(
+ str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
- # LRS and its location
- this_trait.LRS_score_repr = 'N/A'
- this_trait.LRS_location_repr = 'N/A'
-
- # Max LRS and its Locus location
- if this_trait.lrs and this_trait.locus:
- query = """
- select Geno.Chr, Geno.Mb from Geno, Species
- where Species.Name = '{}' and
- Geno.Name = '{}' and
- Geno.SpeciesId = Species.Id
- """.format(species, this_trait.locus)
- result = g.db.execute(query).fetchone()
-
- if result:
- lrs_chr, lrs_mb = result
- this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
- this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
- lrs_chr, float(lrs_mb))
+ if len(description_string) > 1 and description_string != 'None':
+ description_display = description_string
+ else:
+ description_display = this_trait.symbol
+
+ if (len(description_display) > 1 and description_display != 'N/A'
+ and len(target_string) > 1 and target_string != 'None'):
+ description_display = description_display + '; ' + target_string.strip()
+
+ # Save it for the jinja2 template
+ this_trait.description_display = description_display
+
+ if this_trait.chr and this_trait.mb:
+ this_trait.location_repr = 'Chr%s: %.6f' % (
+ this_trait.chr, float(this_trait.mb))
+
+ # Get mean expression value
+ cursor.execute(
+ "SELECT ProbeSetXRef.mean FROM "
+ "ProbeSetXRef, ProbeSet WHERE "
+ "ProbeSetXRef.ProbeSetFreezeId = %s "
+ "AND ProbeSet.Id = ProbeSetXRef.ProbeSetId "
+ "AND ProbeSet.Name = %s",
+ (str(this_trait.dataset.id), this_trait.name,)
+ )
+ result = cursor.fetchone()
+
+ mean = result[0] if result else 0
+
+ if mean:
+ this_trait.mean = "%2.3f" % mean
+
+ # LRS and its location
+ this_trait.LRS_score_repr = 'N/A'
+ this_trait.LRS_location_repr = 'N/A'
+
+ # Max LRS and its Locus location
+ if this_trait.lrs and this_trait.locus:
+ cursor.execute(
+ "SELECT Geno.Chr, Geno.Mb FROM "
+ "Geno, Species WHERE "
+ "Species.Name = %s AND "
+ "Geno.Name = %s AND "
+ "Geno.SpeciesId = Species.Id",
+ (species, this_trait.locus,)
+ )
+ if result := cursor.fetchone():
+ lrs_chr, lrs_mb = result
+ this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
+ this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
+ lrs_chr, float(lrs_mb))
return trait_list
def retrieve_sample_data(self, trait):
- query = """
- SELECT
- Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
- FROM
- (ProbeSetData, ProbeSetFreeze,
- Strain, ProbeSet, ProbeSetXRef)
- left join ProbeSetSE on
- (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
- left join NStrain on
- (NStrain.DataId = ProbeSetData.Id AND
- NStrain.StrainId = ProbeSetData.StrainId)
- WHERE
- ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = '%s' AND
- ProbeSetXRef.DataId = ProbeSetData.Id AND
- ProbeSetData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """ % (escape(trait), escape(self.name))
- results = g.db.execute(query).fetchall()
- return results
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT Strain.Name, ProbeSetData.value, "
+ "ProbeSetSE.error, NStrain.count, "
+ "Strain.Name2 FROM (ProbeSetData, "
+ "ProbeSetFreeze, Strain, ProbeSet, "
+ "ProbeSetXRef) LEFT JOIN ProbeSetSE ON "
+ "(ProbeSetSE.DataId = ProbeSetData.Id AND "
+ "ProbeSetSE.StrainId = ProbeSetData.StrainId) "
+ "LEFT JOIN NStrain ON "
+ "(NStrain.DataId = ProbeSetData.Id AND "
+ "NStrain.StrainId = ProbeSetData.StrainId) "
+ "WHERE ProbeSet.Name = %s AND "
+ "ProbeSetXRef.ProbeSetId = ProbeSet.Id "
+ "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetFreeze.Name = %s AND "
+ "ProbeSetXRef.DataId = ProbeSetData.Id "
+ "AND ProbeSetData.StrainId = Strain.Id "
+ "ORDER BY Strain.Name",
+ (trait, self.name)
+ )
+ return cursor.fetchall()
def retrieve_genes(self, column_name):
- query = """
- select ProbeSet.Name, ProbeSet.%s
- from ProbeSet,ProbeSetXRef
- where ProbeSetXRef.ProbeSetFreezeId = %s and
- ProbeSetXRef.ProbeSetId=ProbeSet.Id;
- """ % (column_name, escape(str(self.id)))
- results = g.db.execute(query).fetchall()
-
- return dict(results)
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ f"SELECT ProbeSet.Name, ProbeSet.{column_name} "
+ "FROM ProbeSet,ProbeSetXRef WHERE "
+ "ProbeSetXRef.ProbeSetFreezeId = %s "
+ "AND ProbeSetXRef.ProbeSetId=ProbeSet.Id",
+ (str(self.id),))
+ return dict(cursor.fetchall())
class TempDataSet(DataSet):
- '''Temporary user-generated data set'''
-
+ """Temporary user-generated data set"""
DS_NAME_MAP['Temp'] = 'TempDataSet'
def setup(self):
@@ -1202,30 +1217,15 @@ class TempDataSet(DataSet):
def geno_mrna_confidentiality(ob):
- dataset_table = ob.type + "Freeze"
-
- query = '''SELECT Id, Name, FullName, confidentiality,
- AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
- result = g.db.execute(query)
-
- (dataset_id,
- name,
- full_name,
- confidential,
- authorized_users) = result.fetchall()[0]
-
- if confidential:
- return True
- result = g.db.execute(query)
-
- (dataset_id,
- name,
- full_name,
- confidential,
- authorized_users) = result.fetchall()[0]
-
- if confidential:
- return True
+ with database_connection() as conn, conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT confidentiality, "
+ "AuthorisedUsers FROM %s WHERE Name = %s",
+ (f"{ob.type}Freeze", ob.name,)
+ )
+ result = cursor.fetchall()[0]
+ if result:
+ return True
def parse_db_url():
@@ -1239,16 +1239,15 @@ def query_table_timestamp(dataset_type: str):
"""function to query the update timestamp of a given dataset_type"""
# computation data and actions
-
- fetch_db_name = parse_db_url()
- query_update_time = f"""
- SELECT UPDATE_TIME FROM information_schema.tables
- WHERE TABLE_SCHEMA = '{fetch_db_name[-1]}'
- AND TABLE_NAME = '{dataset_type}Data'
- """
-
- date_time_obj = g.db.execute(query_update_time).fetchone()[0]
- return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
+ with database_connection() as conn, conn.cursor() as cursor:
+ fetch_db_name = parse_db_url()
+ cursor.execute(
+ "SELECT UPDATE_TIME FROM "
+ "information_schema.tables "
+ f"WHERE TABLE_SCHEMA = '{fetch_db_name[-1]}' "
+ f"AND TABLE_NAME = '{dataset_type}Data'")
+ date_time_obj = cursor.fetchone()[0]
+ return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str, samplelist: str):