diff options
Diffstat (limited to 'wqflask/base/data_set.py')
-rw-r--r-- | wqflask/base/data_set.py | 114 |
1 files changed, 88 insertions, 26 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index ebf3f021..cab708ef 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -56,7 +56,7 @@ from pprint import pformat as pf from db.gn_server import menu_main from db.call import fetchall,fetchone,fetch1 -from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists +from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL from utility.logger import getLogger logger = getLogger(__name__ ) @@ -64,10 +64,9 @@ logger = getLogger(__name__ ) # Each subclass will add to this DS_NAME_MAP = {} -def create_dataset(dataset_name, rebuild=True, dataset_type = None, get_samplelist = True, group_name = None): +def create_dataset(dataset_name, dataset_type = None, get_samplelist = True, group_name = None): if not dataset_type: dataset_type = Dataset_Getter(dataset_name) - logger.debug("dataset_type", dataset_type) dataset_ob = DS_NAME_MAP[dataset_type] dataset_class = globals()[dataset_ob] @@ -78,7 +77,7 @@ def create_dataset(dataset_name, rebuild=True, dataset_type = None, get_sampleli class Dataset_Types(object): - def __init__(self, rebuild=False): + def __init__(self): """Create a dictionary of samples where the value is set to Geno, Publish or ProbeSet. E.g. @@ -94,33 +93,96 @@ Publish or ProbeSet. E.g. """ self.datasets = {} - if rebuild: #ZS: May make this the only option - data = json.loads(requests.get("http://gn2.genenetwork.org/api/v_pre1/gen_dropdown").content) - logger.debug("THE DATA:", data) - #data = gen_menu.gen_dropdown_json() - else: - file_name = "wqflask/static/new/javascript/dataset_menu_structure.json" - with open(file_name, 'r') as fh: - data = json.load(fh) - - for species in data['datasets']: - for group in data['datasets'][species]: - for dataset_type in data['datasets'][species][group]: - for dataset in data['datasets'][species][group][dataset_type]: - short_dataset_name = dataset[1] - if dataset_type == "Phenotypes": - new_type = "Publish" - elif dataset_type == "Genotypes": - new_type = "Geno" - else: - new_type = "ProbeSet" - self.datasets[short_dataset_name] = new_type + + data = Redis.get("dataset_structure") + if data: + self.datasets = json.loads(data) + else: #ZS: I don't think this should ever run unless Redis is emptied + try: + data = json.loads(requests.get(GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout = 5).content) + for species in data['datasets']: + for group in data['datasets'][species]: + for dataset_type in data['datasets'][species][group]: + for dataset in data['datasets'][species][group][dataset_type]: + short_dataset_name = dataset[1] + if dataset_type == "Phenotypes": + new_type = "Publish" + elif dataset_type == "Genotypes": + new_type = "Geno" + else: + new_type = "ProbeSet" + self.datasets[short_dataset_name] = new_type + except: + pass + + Redis.set("dataset_structure", json.dumps(self.datasets)) # Set LOG_LEVEL_DEBUG=5 to see the following: logger.debugf(5, "datasets",self.datasets) def __call__(self, name): - return self.datasets[name] + if name not in self.datasets: + mrna_expr_query = """ + SELECT + ProbeSetFreeze.Id + FROM + ProbeSetFreeze + WHERE + ProbeSetFreeze.Name = "{0}" + """.format(name) + + results = g.db.execute(geno_query).fetchall() + if len(results): + self.datasets[name] = "ProbeSet" + Redis.set("dataset_structure", json.dumps(self.datasets)) + return self.datasets[name] + + group_name = name.replace("Publish", "") + + pheno_query = """SELECT InfoFiles.GN_AccesionId + FROM InfoFiles, PublishFreeze, InbredSet + WHERE InbredSet.Name = '{0}' AND + PublishFreeze.InbredSetId = InbredSet.Id AND + InfoFiles.InfoPageName = PublishFreeze.Name""".format(group_name) + + results = g.db.execute(pheno_query).fetchall() + if len(results): + self.datasets[name] = "Publish" + Redis.set("dataset_structure", json.dumps(self.datasets)) + return self.datasets[name] + + #ZS: For when there isn't an InfoFiles ID; not sure if this and the preceding query are both necessary + other_pheno_query = """SELECT PublishFreeze.Name + FROM PublishFreeze, InbredSet + WHERE InbredSet.Name = '{}' AND + PublishFreeze.InbredSetId = InbredSet.Id""".format(group_name) + + results = g.db.execute(other_pheno_query).fetchall() + if len(results): + self.datasets[name] = "Publish" + Redis.set("dataset_structure", json.dumps(self.datasets)) + return self.datasets[name] + + geno_query = """ + SELECT + GenoFreezeId + FROM + GenoFreeze + WHERE + GenoFreeze.Name = "{0}" + {1} + """.format(name) + + results = g.db.execute(geno_query).fetchall() + if len(results): + self.datasets[name] = "Geno" + Redis.set("dataset_structure", json.dumps(self.datasets)) + return self.datasets[name] + + #ZS: It shouldn't ever reach this + return None + else: + return self.datasets[name] # Do the intensive work at startup one time only Dataset_Getter = Dataset_Types() |