aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base/data_set.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base/data_set.py')
-rw-r--r--wqflask/base/data_set.py113
1 files changed, 87 insertions, 26 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ebf3f021..1457ba8d 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -56,7 +56,7 @@ from pprint import pformat as pf
from db.gn_server import menu_main
from db.call import fetchall,fetchone,fetch1
-from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists
+from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL
from utility.logger import getLogger
logger = getLogger(__name__ )
@@ -64,10 +64,9 @@ logger = getLogger(__name__ )
# Each subclass will add to this
DS_NAME_MAP = {}
-def create_dataset(dataset_name, rebuild=True, dataset_type = None, get_samplelist = True, group_name = None):
+def create_dataset(dataset_name, dataset_type = None, get_samplelist = True, group_name = None):
if not dataset_type:
dataset_type = Dataset_Getter(dataset_name)
- logger.debug("dataset_type", dataset_type)
dataset_ob = DS_NAME_MAP[dataset_type]
dataset_class = globals()[dataset_ob]
@@ -78,7 +77,7 @@ def create_dataset(dataset_name, rebuild=True, dataset_type = None, get_sampleli
class Dataset_Types(object):
- def __init__(self, rebuild=False):
+ def __init__(self):
"""Create a dictionary of samples where the value is set to Geno,
Publish or ProbeSet. E.g.
@@ -94,33 +93,95 @@ Publish or ProbeSet. E.g.
"""
self.datasets = {}
- if rebuild: #ZS: May make this the only option
- data = json.loads(requests.get("http://gn2.genenetwork.org/api/v_pre1/gen_dropdown").content)
- logger.debug("THE DATA:", data)
- #data = gen_menu.gen_dropdown_json()
- else:
- file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
- with open(file_name, 'r') as fh:
- data = json.load(fh)
-
- for species in data['datasets']:
- for group in data['datasets'][species]:
- for dataset_type in data['datasets'][species][group]:
- for dataset in data['datasets'][species][group][dataset_type]:
- short_dataset_name = dataset[1]
- if dataset_type == "Phenotypes":
- new_type = "Publish"
- elif dataset_type == "Genotypes":
- new_type = "Geno"
- else:
- new_type = "ProbeSet"
- self.datasets[short_dataset_name] = new_type
+
+ data = Redis.get("dataset_structure")
+ if data:
+ self.datasets = json.loads(data)
+ else: #ZS: I don't think this should ever run unless Redis is emptied
+ try:
+ data = json.loads(requests.get(GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout = 5).content)
+ for species in data['datasets']:
+ for group in data['datasets'][species]:
+ for dataset_type in data['datasets'][species][group]:
+ for dataset in data['datasets'][species][group][dataset_type]:
+ short_dataset_name = dataset[1]
+ if dataset_type == "Phenotypes":
+ new_type = "Publish"
+ elif dataset_type == "Genotypes":
+ new_type = "Geno"
+ else:
+ new_type = "ProbeSet"
+ self.datasets[short_dataset_name] = new_type
+ except:
+ pass
+
+ Redis.set("dataset_structure", json.dumps(self.datasets))
# Set LOG_LEVEL_DEBUG=5 to see the following:
logger.debugf(5, "datasets",self.datasets)
def __call__(self, name):
- return self.datasets[name]
+ if name not in self.datasets:
+ mrna_expr_query = """
+ SELECT
+ ProbeSetFreeze.Id
+ FROM
+ ProbeSetFreeze
+ WHERE
+ ProbeSetFreeze.Name = "{0}"
+ """.format(name)
+
+ results = g.db.execute(mrna_expr_query).fetchall()
+ if len(results):
+ self.datasets[name] = "ProbeSet"
+ Redis.set("dataset_structure", json.dumps(self.datasets))
+ return self.datasets[name]
+
+ group_name = name.replace("Publish", "")
+
+ pheno_query = """SELECT InfoFiles.GN_AccesionId
+ FROM InfoFiles, PublishFreeze, InbredSet
+ WHERE InbredSet.Name = '{0}' AND
+ PublishFreeze.InbredSetId = InbredSet.Id AND
+ InfoFiles.InfoPageName = PublishFreeze.Name""".format(group_name)
+
+ results = g.db.execute(pheno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "Publish"
+ Redis.set("dataset_structure", json.dumps(self.datasets))
+ return self.datasets[name]
+
+ #ZS: For when there isn't an InfoFiles ID; not sure if this and the preceding query are both necessary
+ other_pheno_query = """SELECT PublishFreeze.Name
+ FROM PublishFreeze, InbredSet
+ WHERE InbredSet.Name = '{}' AND
+ PublishFreeze.InbredSetId = InbredSet.Id""".format(group_name)
+
+ results = g.db.execute(other_pheno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "Publish"
+ Redis.set("dataset_structure", json.dumps(self.datasets))
+ return self.datasets[name]
+
+ geno_query = """
+ SELECT
+ GenoFreeze.Id
+ FROM
+ GenoFreeze
+ WHERE
+ GenoFreeze.Name = "{0}"
+ """.format(name)
+
+ results = g.db.execute(geno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "Geno"
+ Redis.set("dataset_structure", json.dumps(self.datasets))
+ return self.datasets[name]
+
+ #ZS: It shouldn't ever reach this
+ return None
+ else:
+ return self.datasets[name]
# Do the intensive work at startup one time only
Dataset_Getter = Dataset_Types()