aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
authorzsloan2021-10-18 17:50:26 +0000
committerzsloan2021-10-18 17:50:26 +0000
commite36eaf0003a598bc5aa688803dd1b36c24a4c051 (patch)
treea59b7dadf02241575eb0774f97c6048e2425c053 /wqflask/base
parentbd421438f1f0b4de913fa40cd49cfcda27e6b16f (diff)
parent04f3d13aceeaec2e52b94037d59f08ed6dc6a8bb (diff)
downloadgenenetwork2-e36eaf0003a598bc5aa688803dd1b36c24a4c051.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into feature/remove_trait_creation_from_search
Diffstat (limited to 'wqflask/base')
-rw-r--r--wqflask/base/GeneralObject.py4
-rw-r--r--wqflask/base/data_set.py250
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py38
-rw-r--r--wqflask/base/species.py89
-rw-r--r--wqflask/base/trait.py114
-rw-r--r--wqflask/base/webqtlCaseData.py9
-rw-r--r--wqflask/base/webqtlConfig.py42
7 files changed, 294 insertions, 252 deletions
diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py
index 249195e2..ce8e60b8 100644
--- a/wqflask/base/GeneralObject.py
+++ b/wqflask/base/GeneralObject.py
@@ -62,5 +62,5 @@ class GeneralObject:
return s
def __eq__(self, other):
- return (len(list(self.__dict__.keys())) ==
- len(list(other.__dict__.keys())))
+ return (len(list(self.__dict__.keys()))
+ == len(list(other.__dict__.keys())))
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 178234fe..8906ab69 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -17,7 +17,10 @@
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
# This module is used by GeneNetwork project (www.genenetwork.org)
-
+from dataclasses import dataclass
+from dataclasses import field
+from dataclasses import InitVar
+from typing import Optional, Dict
from db.call import fetchall, fetchone, fetch1
from utility.logger import getLogger
from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL
@@ -59,7 +62,8 @@ logger = getLogger(__name__)
DS_NAME_MAP = {}
-def create_dataset(dataset_name, dataset_type=None, get_samplelist=True, group_name=None):
+def create_dataset(dataset_name, dataset_type=None,
+ get_samplelist=True, group_name=None):
if dataset_name == "Temp":
dataset_type = "Temp"
@@ -74,11 +78,10 @@ def create_dataset(dataset_name, dataset_type=None, get_samplelist=True, group_n
return dataset_class(dataset_name, get_samplelist)
+@dataclass
class DatasetType:
-
- def __init__(self, redis_instance):
- """Create a dictionary of samples where the value is set to Geno,
-Publish or ProbeSet. E.g.
+ """Create a dictionary of samples where the value is set to Geno,
+ Publish or ProbeSet. E.g.
{'AD-cases-controls-MyersGeno': 'Geno',
'AD-cases-controls-MyersPublish': 'Publish',
@@ -89,21 +92,28 @@ Publish or ProbeSet. E.g.
'All Phenotypes': 'Publish',
'B139_K_1206_M': 'ProbeSet',
'B139_K_1206_R': 'ProbeSet' ...
-
+ }
"""
+ redis_instance: InitVar[Redis]
+ datasets: Optional[Dict] = field(init=False, default_factory=dict)
+ data: Optional[Dict] = field(init=False)
+
+ def __post_init__(self, redis_instance):
self.redis_instance = redis_instance
- self.datasets = {}
- data = self.redis_instance.get("dataset_structure")
+ data = redis_instance.get("dataset_structure")
if data:
self.datasets = json.loads(data)
- else: # ZS: I don't think this should ever run unless Redis is emptied
+ else:
+ # ZS: I don't think this should ever run unless Redis is
+ # emptied
try:
data = json.loads(requests.get(
- GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content)
- for species in data['datasets']:
- for group in data['datasets'][species]:
- for dataset_type in data['datasets'][species][group]:
- for dataset in data['datasets'][species][group][dataset_type]:
+ GN2_BASE_URL + "/api/v_pre1/gen_dropdown",
+ timeout=5).content)
+ for _species in data['datasets']:
+ for group in data['datasets'][_species]:
+ for dataset_type in data['datasets'][_species][group]:
+ for dataset in data['datasets'][_species][group][dataset_type]:
short_dataset_name = dataset[1]
if dataset_type == "Phenotypes":
new_type = "Publish"
@@ -112,15 +122,16 @@ Publish or ProbeSet. E.g.
else:
new_type = "ProbeSet"
self.datasets[short_dataset_name] = new_type
- except:
+ except Exception: # Do nothing
pass
- self.redis_instance.set("dataset_structure", json.dumps(self.datasets))
+ self.redis_instance.set("dataset_structure",
+ json.dumps(self.datasets))
+ self.data = data
def set_dataset_key(self, t, name):
- """If name is not in the object's dataset dictionary, set it, and update
- dataset_structure in Redis
-
+ """If name is not in the object's dataset dictionary, set it, and
+ update dataset_structure in Redis
args:
t: Type of dataset structure which can be: 'mrna_expr', 'pheno',
'other_pheno', 'geno'
@@ -128,19 +139,20 @@ Publish or ProbeSet. E.g.
"""
sql_query_mapping = {
- 'mrna_expr': ("""SELECT ProbeSetFreeze.Id FROM """ +
- """ProbeSetFreeze WHERE ProbeSetFreeze.Name = "{}" """),
- 'pheno': ("""SELECT InfoFiles.GN_AccesionId """ +
- """FROM InfoFiles, PublishFreeze, InbredSet """ +
- """WHERE InbredSet.Name = '{}' AND """ +
- """PublishFreeze.InbredSetId = InbredSet.Id AND """ +
- """InfoFiles.InfoPageName = PublishFreeze.Name"""),
- 'other_pheno': ("""SELECT PublishFreeze.Name """ +
- """FROM PublishFreeze, InbredSet """ +
- """WHERE InbredSet.Name = '{}' AND """ +
- """PublishFreeze.InbredSetId = InbredSet.Id"""),
- 'geno': ("""SELECT GenoFreeze.Id FROM GenoFreeze WHERE """ +
- """GenoFreeze.Name = "{}" """)
+ 'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM "
+ "ProbeSetFreeze WHERE "
+ "ProbeSetFreeze.Name = \"%s\" "),
+ 'pheno': ("SELECT InfoFiles.GN_AccesionId "
+ "FROM InfoFiles, PublishFreeze, InbredSet "
+ "WHERE InbredSet.Name = '%s' AND "
+ "PublishFreeze.InbredSetId = InbredSet.Id AND "
+ "InfoFiles.InfoPageName = PublishFreeze.Name"),
+ 'other_pheno': ("SELECT PublishFreeze.Name "
+ "FROM PublishFreeze, InbredSet "
+ "WHERE InbredSet.Name = '%s' AND "
+ "PublishFreeze.InbredSetId = InbredSet.Id"),
+ 'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE "
+ "GenoFreeze.Name = \"%s\" ")
}
dataset_name_mapping = {
@@ -154,22 +166,23 @@ Publish or ProbeSet. E.g.
if t in ['pheno', 'other_pheno']:
group_name = name.replace("Publish", "")
- results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone()
+ results = g.db.execute(sql_query_mapping[t] % group_name).fetchone()
if results:
self.datasets[name] = dataset_name_mapping[t]
- self.redis_instance.set("dataset_structure", json.dumps(self.datasets))
+ self.redis_instance.set(
+ "dataset_structure", json.dumps(self.datasets))
return True
-
return None
def __call__(self, name):
-
if name not in self.datasets:
for t in ["mrna_expr", "pheno", "other_pheno", "geno"]:
- # This has side-effects, with the end result being a truth-y value
+ # This has side-effects, with the end result being a
+ # truth-y value
if(self.set_dataset_key(t, name)):
break
- return self.datasets.get(name, None) # Return None if name has not been set
+ # Return None if name has not been set
+ return self.datasets.get(name, None)
# Do the intensive work at startup one time only
@@ -204,12 +217,12 @@ def create_datasets_list():
if USE_REDIS:
r.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL))
- r.expire(key, 60*60)
+ r.expire(key, 60 * 60)
return datasets
-class Markers(object):
+class Markers:
"""Todo: Build in cacheing so it saves us reading the same file more than once"""
def __init__(self, name):
@@ -228,7 +241,8 @@ class Markers(object):
for line in bimbam_fh:
marker = {}
marker['name'] = line.split(delimiter)[0].rstrip()
- marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000
+ marker['Mb'] = float(line.split(delimiter)[
+ 1].rstrip()) / 1000000
marker['chr'] = line.split(delimiter)[2].rstrip()
markers.append(marker)
@@ -262,10 +276,7 @@ class Markers(object):
elif isinstance(p_values, dict):
filtered_markers = []
for marker in self.markers:
- #logger.debug("marker[name]", marker['name'])
- #logger.debug("p_values:", p_values)
if marker['name'] in p_values:
- #logger.debug("marker {} IS in p_values".format(i))
marker['p_value'] = p_values[marker['name']]
if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
marker['lod_score'] = 0
@@ -276,10 +287,6 @@ class Markers(object):
marker['lrs_value'] = - \
math.log10(marker['p_value']) * 4.61
filtered_markers.append(marker)
- # else:
- #logger.debug("marker {} NOT in p_values".format(i))
- # self.markers.remove(marker)
- #del self.markers[i]
self.markers = filtered_markers
@@ -290,7 +297,6 @@ class HumanMarkers(Markers):
self.markers = []
for line in marker_data_fh:
splat = line.strip().split()
- #logger.debug("splat:", splat)
if len(specified_markers) > 0:
if splat[1] in specified_markers:
marker = {}
@@ -306,13 +312,11 @@ class HumanMarkers(Markers):
marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
- #logger.debug("markers is: ", pf(self.markers))
-
def add_pvalues(self, p_values):
super(HumanMarkers, self).add_pvalues(p_values)
-class DatasetGroup(object):
+class DatasetGroup:
"""
Each group has multiple datasets; each species has multiple groups.
@@ -365,8 +369,8 @@ class DatasetGroup(object):
def get_markers(self):
def check_plink_gemma():
if flat_file_exists("mapping"):
- MAPPING_PATH = flat_files("mapping")+"/"
- if os.path.isfile(MAPPING_PATH+self.name+".bed"):
+ MAPPING_PATH = flat_files("mapping") + "/"
+ if os.path.isfile(MAPPING_PATH + self.name + ".bed"):
return True
return False
@@ -392,6 +396,15 @@ class DatasetGroup(object):
if maternal and paternal:
self.parlist = [maternal, paternal]
+ def get_study_samplelists(self):
+ study_sample_file = locate_ignore_error(self.name + ".json", 'study_sample_lists')
+ try:
+ f = open(study_sample_file)
+ except:
+ return []
+ study_samples = json.load(f)
+ return study_samples
+
def get_genofiles(self):
jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name)
try:
@@ -412,7 +425,7 @@ class DatasetGroup(object):
else:
logger.debug("Cache not hit")
- genotype_fn = locate_ignore_error(self.name+".geno", 'genotype')
+ genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype')
if genotype_fn:
self.samplelist = get_group_samplelists.get_samplelist(
"geno", genotype_fn)
@@ -421,7 +434,7 @@ class DatasetGroup(object):
if USE_REDIS:
r.set(key, json.dumps(self.samplelist))
- r.expire(key, 60*5)
+ r.expire(key, 60 * 5)
def all_samples_ordered(self):
result = []
@@ -434,7 +447,6 @@ class DatasetGroup(object):
# genotype_1 is Dataset Object without parents and f1
# genotype_2 is Dataset Object with parents and f1 (not for intercross)
- #genotype_1 = reaper.Dataset()
# reaper barfs on unicode filenames, so here we ensure it's a string
if self.genofile:
@@ -520,7 +532,6 @@ def datasets(group_name, this_group=None):
break
if tissue_already_exists:
- #logger.debug("dataset_menu:", dataset_menu[i]['datasets'])
dataset_menu[i]['datasets'].append((dataset, dataset_short))
else:
dataset_menu.append(dict(tissue=tissue_name,
@@ -528,7 +539,7 @@ def datasets(group_name, this_group=None):
if USE_REDIS:
r.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL))
- r.expire(key, 60*5)
+ r.expire(key, 60 * 5)
if this_group != None:
this_group._datasets = dataset_menu
@@ -537,7 +548,7 @@ def datasets(group_name, this_group=None):
return dataset_menu
-class DataSet(object):
+class DataSet:
"""
DataSet class defines a dataset in webqtl, can be either Microarray,
Published phenotype, genotype, or user input dataset(temp)
@@ -553,6 +564,7 @@ class DataSet(object):
self.fullname = None
self.type = None
self.data_scale = None # ZS: For example log2
+ self.accession_id = None
self.setup()
@@ -569,14 +581,16 @@ class DataSet(object):
self.group.get_samplelist()
self.species = species.TheSpecies(self)
- def get_desc(self):
- """Gets overridden later, at least for Temp...used by trait's get_given_name"""
- return None
-
- # Delete this eventually
- @property
- def riset():
- Weve_Renamed_This_As_Group
+ def as_dict(self):
+ return {
+ 'name': self.name,
+ 'shortname': self.shortname,
+ 'fullname': self.fullname,
+ 'type': self.type,
+ 'data_scale': self.data_scale,
+ 'group': self.group.name,
+ 'accession_id': self.accession_id
+ }
def get_accession_id(self):
if self.type == "Publish":
@@ -628,7 +642,7 @@ class DataSet(object):
WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
AND ProbeFreeze.TissueId = Tissue.Id
AND (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s')
- """ % (query_args), "/dataset/"+self.name+".json",
+ """ % (query_args), "/dataset/" + self.name + ".json",
lambda r: (r["id"], r["name"], r["full_name"],
r["short_name"], r["data_scale"], r["tissue"])
)
@@ -651,6 +665,69 @@ class DataSet(object):
"Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
+ def chunk_dataset(self, dataset, n):
+
+ results = {}
+
+ query = """
+ SELECT ProbeSetXRef.DataId,ProbeSet.Name
+ FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze
+ WHERE ProbeSetFreeze.Name = '{}' AND
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id
+ """.format(self.name)
+
+ # should cache this
+
+ traits_name_dict = dict(g.db.execute(query).fetchall())
+
+ for i in range(0, len(dataset), n):
+ matrix = list(dataset[i:i + n])
+ trait_name = traits_name_dict[matrix[0][0]]
+
+ my_values = [value for (trait_name, strain, value) in matrix]
+ results[trait_name] = my_values
+ return results
+
+ def get_probeset_data(self, sample_list=None, trait_ids=None):
+
+ # improvement of get trait data--->>>
+ if sample_list:
+ self.samplelist = sample_list
+
+ else:
+ self.samplelist = self.group.samplelist
+
+ if self.group.parlist != None and self.group.f1list != None:
+ if (self.group.parlist + self.group.f1list) in self.samplelist:
+ self.samplelist += self.group.parlist + self.group.f1list
+
+ query = """
+ SELECT Strain.Name, Strain.Id FROM Strain, Species
+ WHERE Strain.Name IN {}
+ and Strain.SpeciesId=Species.Id
+ and Species.name = '{}'
+ """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
+ results = dict(g.db.execute(query).fetchall())
+ sample_ids = [results[item] for item in self.samplelist]
+
+ sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
+ results.items(), key=lambda item: item[1])]
+
+ query = """SELECT * from ProbeSetData
+ where StrainID in {}
+ and id in (SELECT ProbeSetXRef.DataId
+ FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Name = '{}'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name)
+
+ query_results = list(g.db.execute(query).fetchall())
+ data_results = self.chunk_dataset(query_results, len(sample_ids))
+ self.samplelist = sorted_samplelist
+ self.trait_data = data_results
+
+
def get_trait_data(self, sample_list=None):
if sample_list:
self.samplelist = sample_list
@@ -667,7 +744,6 @@ class DataSet(object):
and Strain.SpeciesId=Species.Id
and Species.name = '{}'
""".format(create_in_clause(self.samplelist), *mescape(self.group.species))
- logger.sql(query)
results = dict(g.db.execute(query).fetchall())
sample_ids = [results[item] for item in self.samplelist]
@@ -735,9 +811,6 @@ class PhenotypeDataSet(DataSet):
DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
def setup(self):
-
- #logger.debug("IS A PHENOTYPEDATASET")
-
# Fields in the database table
self.search_fields = ['Phenotype.Post_publication_description',
'Phenotype.Pre_publication_description',
@@ -841,7 +914,6 @@ class PhenotypeDataSet(DataSet):
Geno.Name = '%s' and
Geno.SpeciesId = Species.Id
""" % (species, this_trait.locus)
- logger.sql(query)
result = g.db.execute(query).fetchone()
if result:
@@ -871,7 +943,6 @@ class PhenotypeDataSet(DataSet):
Order BY
Strain.Name
"""
- logger.sql(query)
results = g.db.execute(query, (trait, self.id)).fetchall()
return results
@@ -938,7 +1009,6 @@ class GenotypeDataSet(DataSet):
Order BY
Strain.Name
"""
- logger.sql(query)
results = g.db.execute(query,
(webqtlDatabaseFunction.retrieve_species_id(self.group.name),
trait, self.name)).fetchall()
@@ -1040,8 +1110,8 @@ class MrnaAssayDataSet(DataSet):
else:
description_display = this_trait.symbol
- if (len(description_display) > 1 and description_display != 'N/A' and
- len(target_string) > 1 and target_string != 'None'):
+ if (len(description_display) > 1 and description_display != 'N/A'
+ and len(target_string) > 1 and target_string != 'None'):
description_display = description_display + '; ' + target_string.strip()
# Save it for the jinja2 template
@@ -1059,9 +1129,6 @@ class MrnaAssayDataSet(DataSet):
ProbeSet.Name = '%s'
""" % (escape(str(this_trait.dataset.id)),
escape(this_trait.name)))
-
- #logger.debug("query is:", pf(query))
- logger.sql(query)
result = g.db.execute(query).fetchone()
mean = result[0] if result else 0
@@ -1081,7 +1148,6 @@ class MrnaAssayDataSet(DataSet):
Geno.Name = '{}' and
Geno.SpeciesId = Species.Id
""".format(species, this_trait.locus)
- logger.sql(query)
result = g.db.execute(query).fetchone()
if result:
@@ -1097,7 +1163,8 @@ class MrnaAssayDataSet(DataSet):
SELECT
Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
FROM
- (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+ (ProbeSetData, ProbeSetFreeze,
+ Strain, ProbeSet, ProbeSetXRef)
left join ProbeSetSE on
(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
left join NStrain on
@@ -1112,9 +1179,7 @@ class MrnaAssayDataSet(DataSet):
Order BY
Strain.Name
""" % (escape(trait), escape(self.name))
- logger.sql(query)
results = g.db.execute(query).fetchall()
- #logger.debug("RETRIEVED RESULTS HERE:", results)
return results
def retrieve_genes(self, column_name):
@@ -1124,7 +1189,6 @@ class MrnaAssayDataSet(DataSet):
where ProbeSetXRef.ProbeSetFreezeId = %s and
ProbeSetXRef.ProbeSetId=ProbeSet.Id;
""" % (column_name, escape(str(self.id)))
- logger.sql(query)
results = g.db.execute(query).fetchall()
return dict(results)
@@ -1155,11 +1219,19 @@ class TempDataSet(DataSet):
def geno_mrna_confidentiality(ob):
dataset_table = ob.type + "Freeze"
- #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
query = '''SELECT Id, Name, FullName, confidentiality,
AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
- logger.sql(query)
+ result = g.db.execute(query)
+
+ (dataset_id,
+ name,
+ full_name,
+ confidential,
+ authorized_users) = result.fetchall()[0]
+
+ if confidential:
+ return True
result = g.db.execute(query)
(dataset_id,
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index f1929518..8f8e2b0a 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -6,12 +6,14 @@ from utility import db_tools
from utility import Bunch
from utility.db_tools import escape
+from gn3.db_utils import database_connector
from utility.logger import getLogger
-logger = getLogger(__name__ )
+logger = getLogger(__name__)
-class MrnaAssayTissueData(object):
+
+class MrnaAssayTissueData:
def __init__(self, gene_symbols=None):
self.gene_symbols = gene_symbols
@@ -20,7 +22,7 @@ class MrnaAssayTissueData(object):
self.data = collections.defaultdict(Bunch)
- query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
+ query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
from (
select Symbol, max(Mean) as maxmean
from TissueProbeSetXRef
@@ -31,29 +33,31 @@ class MrnaAssayTissueData(object):
# Due to the limit size of TissueProbeSetFreezeId table in DB,
# performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
if len(gene_symbols) == 0:
- query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
+ query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''
else:
in_clause = db_tools.create_in_clause(gene_symbols)
- #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
+ # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
query += ''' Symbol in {} group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''.format(in_clause)
- results = g.db.execute(query).fetchall()
- lower_symbols = []
+ # lower_symbols = []
+ lower_symbols = {}
for gene_symbol in gene_symbols:
+ # lower_symbols[gene_symbol.lower()] = True
if gene_symbol != None:
- lower_symbols.append(gene_symbol.lower())
-
+ lower_symbols[gene_symbol.lower()] = True
+ results = list(g.db.execute(query).fetchall())
for result in results:
symbol = result[0]
- if symbol.lower() in lower_symbols:
+ if symbol is not None and lower_symbols.get(symbol.lower()):
+
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
@@ -64,16 +68,16 @@ class MrnaAssayTissueData(object):
self.data[symbol].probe_target_description = result.Probe_Target_Description
###########################################################################
- #Input: cursor, symbolList (list), dataIdDict(Dict)
- #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+ # Input: cursor, symbolList (list), dataIdDict(Dict)
+ # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
# key is symbol, value is one list of expression values of one probeSet;
- #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
- #Attention! All keys are lower case!
+ # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+ # Attention! All keys are lower case!
###########################################################################
def get_symbol_values_pairs(self):
id_list = [self.data[symbol].data_id for symbol in self.data]
-
+
symbol_values_dict = {}
if len(id_list) > 0:
@@ -82,11 +86,13 @@ class MrnaAssayTissueData(object):
WHERE TissueProbeSetData.Id IN {} and
TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
results = g.db.execute(query).fetchall()
for result in results:
if result.Symbol.lower() not in symbol_values_dict:
symbol_values_dict[result.Symbol.lower()] = [result.value]
else:
- symbol_values_dict[result.Symbol.lower()].append(result.value)
+ symbol_values_dict[result.Symbol.lower()].append(
+ result.value)
return symbol_values_dict
diff --git a/wqflask/base/species.py b/wqflask/base/species.py
index 2771d116..f303aabb 100644
--- a/wqflask/base/species.py
+++ b/wqflask/base/species.py
@@ -1,55 +1,66 @@
-import collections
+from collections import OrderedDict
+from dataclasses import dataclass
+from dataclasses import InitVar
+from typing import Optional, Dict
+from flask import g
-from flask import Flask, g
+@dataclass
+class TheSpecies:
+ """Data related to species."""
+ dataset: Optional[Dict] = None
+ species_name: Optional[str] = None
-from utility.logger import getLogger
-logger = getLogger(__name__ )
-
-class TheSpecies(object):
- def __init__(self, dataset=None, species_name=None):
- if species_name != None:
- self.name = species_name
+ def __post_init__(self):
+ if self.species_name is not None:
+ self.name = self.species_name
self.chromosomes = Chromosomes(species=self.name)
else:
- self.dataset = dataset
self.chromosomes = Chromosomes(dataset=self.dataset)
-class IndChromosome(object):
- def __init__(self, name, length):
- self.name = name
- self.length = length
+
+@dataclass
+class IndChromosome:
+ """Data related to IndChromosome"""
+ name: str
+ length: int
@property
def mb_length(self):
- """Chromosome length in megabases"""
+ """Chromosome length in mega-bases"""
return self.length / 1000000
-class Chromosomes(object):
- def __init__(self, dataset=None, species=None):
- self.chromosomes = collections.OrderedDict()
- if species != None:
- query = """
- Select
- Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
- where
- Chr_Length.SpeciesId = Species.SpeciesId AND
- Species.Name = '%s'
- Order by OrderId
- """ % species.capitalize()
- else:
+
+@dataclass
+class Chromosomes:
+ """Data related to a chromosome"""
+ dataset: InitVar[Dict] = None
+ species: Optional[str] = None
+
+ def __post_init__(self, dataset):
+ if self.species is None:
self.dataset = dataset
- query = """
- Select
- Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
- where
- Chr_Length.SpeciesId = InbredSet.SpeciesId AND
- InbredSet.Name = '%s'
- Order by OrderId
- """ % self.dataset.group.name
- logger.sql(query)
+ @property
+ def chromosomes(self):
+ """Lazily fetch the chromosomes"""
+ chromosomes = OrderedDict()
+ if self.species is not None:
+ query = (
+ "SELECT Chr_Length.Name, Chr_Length.OrderId, Length "
+ "FROM Chr_Length, Species WHERE "
+ "Chr_Length.SpeciesId = Species.SpeciesId AND "
+ "Species.Name = "
+ "'%s' ORDER BY OrderId" % self.species.capitalize())
+ else:
+ query = (
+ "SELECT Chr_Length.Name, Chr_Length.OrderId, "
+ "Length FROM Chr_Length, InbredSet WHERE "
+ "Chr_Length.SpeciesId = InbredSet.SpeciesId AND "
+ "InbredSet.Name = "
+ "'%s' ORDER BY OrderId" % self.dataset.group.name)
results = g.db.execute(query).fetchall()
-
for item in results:
- self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
+ chromosomes[item.OrderId] = IndChromosome(
+ item.Name, item.Length)
+ return chromosomes
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 41e2603c..96a09302 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -27,11 +27,13 @@ def create_trait(**kw):
assert bool(kw.get('name')), "Needs trait name"
- if kw.get('dataset_name'):
+ if bool(kw.get('dataset')):
+ dataset = kw.get('dataset')
+ else:
if kw.get('dataset_name') != "Temp":
dataset = create_dataset(kw.get('dataset_name'))
- else:
- dataset = kw.get('dataset')
+ else:
+ dataset = create_dataset("Temp", group_name=kw.get('group_name'))
if dataset.type == 'Publish':
permissions = check_resource_availability(
@@ -51,7 +53,7 @@ def create_trait(**kw):
return None
-class GeneralTrait(object):
+class GeneralTrait:
"""
Trait class defines a trait in webqtl, can be either Microarray,
Published phenotype, genotype, or user input trait
@@ -284,17 +286,19 @@ def get_sample_data():
return None
-def jsonable(trait):
+def jsonable(trait, dataset=None):
"""Return a dict suitable for using as json
Actual turning into json doesn't happen here though"""
- dataset = create_dataset(dataset_name=trait.dataset.name,
- dataset_type=trait.dataset.type,
- group_name=trait.dataset.group.name)
+ if not dataset:
+ dataset = create_dataset(dataset_name=trait.dataset.name,
+ dataset_type=trait.dataset.type,
+ group_name=trait.dataset.group.name)
if dataset.type == "ProbeSet":
return dict(name=trait.name,
+ view=trait.view,
symbol=trait.symbol,
dataset=dataset.name,
dataset_name=dataset.shortname,
@@ -308,103 +312,46 @@ def jsonable(trait):
elif dataset.type == "Publish":
if trait.pubmed_id:
return dict(name=trait.name,
+ view=trait.view,
dataset=dataset.name,
dataset_name=dataset.shortname,
description=trait.description_display,
abbreviation=trait.abbreviation,
authors=trait.authors,
+ pubmed_id=trait.pubmed_id,
pubmed_text=trait.pubmed_text,
pubmed_link=trait.pubmed_link,
+ mean=trait.mean,
lrs_score=trait.LRS_score_repr,
lrs_location=trait.LRS_location_repr,
additive=trait.additive
)
else:
return dict(name=trait.name,
+ view=trait.view,
dataset=dataset.name,
dataset_name=dataset.shortname,
description=trait.description_display,
abbreviation=trait.abbreviation,
authors=trait.authors,
pubmed_text=trait.pubmed_text,
+ mean=trait.mean,
lrs_score=trait.LRS_score_repr,
lrs_location=trait.LRS_location_repr,
additive=trait.additive
)
elif dataset.type == "Geno":
return dict(name=trait.name,
+ view=trait.view,
dataset=dataset.name,
dataset_name=dataset.shortname,
location=trait.location_repr
)
- else:
- return dict()
-
-
-def jsonable_table_row(trait, dataset_name, index):
- """Return a list suitable for json and intended to be displayed in a table
-
- Actual turning into json doesn't happen here though"""
-
- dataset = create_dataset(dataset_name)
-
- if dataset.type == "ProbeSet":
- if trait.mean == "":
- mean = "N/A"
- else:
- mean = "%.3f" % round(float(trait.mean), 2)
- if trait.additive == "":
- additive = "N/A"
- else:
- additive = "%.3f" % round(float(trait.additive), 2)
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
- index,
- '<a href="/show_trait?trait_id=' +
- str(trait.name)+'&dataset='+dataset.name +
- '">'+str(trait.name)+'</a>',
- trait.symbol,
- trait.description_display,
- trait.location_repr,
- mean,
- trait.LRS_score_repr,
- trait.LRS_location_repr,
- additive]
- elif dataset.type == "Publish":
- if trait.additive == "":
- additive = "N/A"
- else:
- additive = "%.2f" % round(float(trait.additive), 2)
- if trait.pubmed_id:
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
- index,
- '<a href="/show_trait?trait_id=' +
- str(trait.name)+'&dataset='+dataset.name +
- '">'+str(trait.name)+'</a>',
- trait.description_display,
- trait.authors,
- '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>',
- trait.LRS_score_repr,
- trait.LRS_location_repr,
- additive]
- else:
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
- index,
- '<a href="/show_trait?trait_id=' +
- str(trait.name)+'&dataset='+dataset.name +
- '">'+str(trait.name)+'</a>',
- trait.description_display,
- trait.authors,
- trait.pubmed_text,
- trait.LRS_score_repr,
- trait.LRS_location_repr,
- additive]
- elif dataset.type == "Geno":
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
- index,
- '<a href="/show_trait?trait_id=' +
- str(trait.name)+'&dataset='+dataset.name +
- '">'+str(trait.name)+'</a>',
- trait.location_repr]
+ elif dataset.name == "Temp":
+ return dict(name=trait.name,
+ view=trait.view,
+ dataset="Temp",
+ dataset_name="Temp")
else:
return dict()
@@ -516,10 +463,11 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
# If the dataset is confidential and the user has access to confidential
# phenotype traits, then display the pre-publication description instead
# of the post-publication description
- trait.description_display = ""
+ trait.description_display = "N/A"
if not trait.pubmed_id:
trait.abbreviation = trait.pre_publication_abbreviation
- trait.description_display = trait.pre_publication_description
+ if trait.pre_publication_description:
+ trait.description_display = trait.pre_publication_description
else:
trait.abbreviation = trait.post_publication_abbreviation
if description:
@@ -542,9 +490,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
else:
description_display = trait.symbol
- if (str(description_display or "") != "" and
- description_display != 'N/A' and
- str(target_string or "") != "" and target_string != 'None'):
+ if (str(description_display or "") != ""
+ and description_display != 'N/A'
+ and str(target_string or "") != "" and target_string != 'None'):
description_display = description_display + '; ' + target_string.strip()
# Save it for the jinja2 template
@@ -638,6 +586,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
if str(trait.lrs or "") != "":
trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
else:
- raise KeyError(repr(trait.name) +
- ' information is not found in the database.')
+ raise KeyError(repr(trait.name)
+ + ' information is not found in the database.')
return trait
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index aa55470f..25b6cb8a 100644
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -28,17 +28,20 @@ import utility.tools
utility.tools.show_settings()
+
class webqtlCaseData:
"""one case data in one trait"""
def __init__(self, name, value=None, variance=None, num_cases=None, name2=None):
self.name = name
- self.name2 = name2 # Other name (for traits like BXD65a)
+ # Other name (for traits like BXD65a)
+ self.name2 = name2
self.value = value # Trait Value
self.variance = variance # Trait Variance
self.num_cases = num_cases # Number of individuals/cases
self.extra_attributes = None
- self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word)
+ # Set a sane default (can't be just "id" cause that's a reserved word)
+ self.this_id = None
self.outlier = None # Not set to True/False until later
def __repr__(self):
@@ -78,4 +81,4 @@ class webqtlCaseData:
def display_num_cases(self):
if self.num_cases is not None:
return "%s" % self.num_cases
- return "x" \ No newline at end of file
+ return "x"
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index aee8616a..39947158 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -1,4 +1,4 @@
-#########################################'
+# '
# Environment Variables - public
#
# Note: much of this needs to handled by the settings/environment
@@ -10,35 +10,35 @@
from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR
-#Debug Level
-#1 for debug, mod python will reload import each time
+# Debug Level
+# 1 for debug, mod python will reload import each time
DEBUG = 1
-#USER privilege
-USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4}
+# USER privilege
+USERDICT = {'guest': 1, 'user': 2, 'admin': 3, 'root': 4}
-#Set privileges
+# Set privileges
SUPER_PRIVILEGES = {'data': 'edit', 'metadata': 'edit', 'admin': 'edit-admins'}
DEFAULT_PRIVILEGES = {'data': 'view', 'metadata': 'view', 'admin': 'not-admin'}
-#minimum number of informative strains
+# minimum number of informative strains
KMININFORMATIVE = 5
-#Daily download limit from one IP
+# Daily download limit from one IP
DAILYMAXIMUM = 1000
-#maximum LRS value
+# maximum LRS value
MAXLRS = 460.0
-#MINIMUM Database public value
+# MINIMUM Database public value
PUBLICTHRESH = 0
-#EXTERNAL LINK ADDRESSES
+# EXTERNAL LINK ADDRESSES
PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
-GENOMEBROWSER_URL="https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s"
+GENOMEBROWSER_URL = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s"
NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s"
GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s"
OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s"
@@ -56,26 +56,28 @@ GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s"
ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s"
EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s"
WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s"
-ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s"
+ENSEMBLETRANSCRIPT_URL = "http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s"
DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s'
PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s"
OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s"
UNIPROT_URL = "https://www.uniprot.org/uniprot/%s"
RGD_URL = "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=%s&category=Gene&species=%s"
PHENOGEN_URL = "https://phenogen.org/gene.jsp?speciesCB=Rn&auto=Y&geneTxt=%s&genomeVer=rn6&section=geneEQTL"
+RRID_MOUSE_URL = "https://www.jax.org/strain/%s"
+RRID_RAT_URL = "https://rgd.mcw.edu/rgdweb/report/strain/main.html?id=%s"
# Temporary storage (note that this TMPDIR can be set as an
# environment variable - use utility.tools.TEMPDIR when you
# want to reach this base dir
assert_writable_dir(TEMPDIR)
-TMPDIR = mk_dir(TEMPDIR+'/gn2/')
+TMPDIR = mk_dir(TEMPDIR + '/gn2/')
assert_writable_dir(TMPDIR)
-CACHEDIR = mk_dir(TMPDIR+'/cache/')
+CACHEDIR = mk_dir(TMPDIR + '/cache/')
# We can no longer write into the git tree:
-GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'generated/')
-GENERATED_TEXT_DIR = mk_dir(TMPDIR+'generated_text/')
+GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/')
+GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/')
# Make sure we have permissions to access these
assert_writable_dir(CACHEDIR)
@@ -83,12 +85,12 @@ assert_writable_dir(GENERATED_IMAGE_DIR)
assert_writable_dir(GENERATED_TEXT_DIR)
# Flat file directories
-GENODIR = flat_files('genotype')+'/'
+GENODIR = flat_files('genotype') + '/'
assert_dir(GENODIR)
# assert_dir(GENODIR+'bimbam') # for gemma
# JSON genotypes are OBSOLETE
-JSON_GENODIR = flat_files('genotype/json')+'/'
+JSON_GENODIR = flat_files('genotype/json') + '/'
if not valid_path(JSON_GENODIR):
# fall back on old location (move the dir, FIXME)
JSON_GENODIR = flat_files('json')
@@ -96,4 +98,4 @@ if not valid_path(JSON_GENODIR):
# Are we using the following...?
PORTADDR = "http://50.16.251.170"
INFOPAGEHREF = '/dbdoc/%s.html'
-CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
+CGIDIR = '/webqtl/' # XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'