diff options
Diffstat (limited to 'wqflask/maintenance/gen_select_dataset.py')
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 296 |
1 files changed, 0 insertions, 296 deletions
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py deleted file mode 100644 index 01b2fc15..00000000 --- a/wqflask/maintenance/gen_select_dataset.py +++ /dev/null @@ -1,296 +0,0 @@ -"""Script that generates the data for the main dropdown menus on the home page - -Writes out data as /static/new/javascript/dataset_menu_structure.json -It needs to be run manually when database has been changed. Run it as - - ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py - -""" - - -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams -# at rwilliams@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) - -import sys - -# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead -sys.path.insert(0, './') -# NEW: import app to avoid a circular dependency on utility.tools -from wqflask import app - -from utility.tools import get_setting - -import simplejson as json -import urllib.parse - - -from pprint import pformat as pf - -from wqflask.database import database_connection - - -def get_species(cursor): - """Build species list""" - #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") - cursor.execute("select Name, MenuName from Species order by OrderId") - species = list(cursor.fetchall()) - return species - - -def get_groups(cursor, species): - """Build groups list""" - groups = {} - for species_name, _species_full_name in species: - cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, - Species, - ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' - and InbredSet.SpeciesId = Species.Id and - (PublishFreeze.InbredSetId = InbredSet.Id - or GenoFreeze.InbredSetId = InbredSet.Id - or ProbeFreeze.InbredSetId = InbredSet.Id) - group by InbredSet.Name - order by InbredSet.FullName""" % species_name) - results = cursor.fetchall() - groups[species_name] = list(results) - return groups - - -def get_types(groups): - """Build types list""" - types = {} - #print("Groups: ", pf(groups)) - for species, group_dict in list(groups.items()): - types[species] = {} - for group_name, _group_full_name in group_dict: - # make group an alias to shorten the code - #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] - if phenotypes_exist(group_name): - types[species][group_name] = [("Phenotypes", "Phenotypes")] - if genotypes_exist(group_name): - if group_name in types[species]: - types[species][group_name] += [("Genotypes", "Genotypes")] - else: - types[species][group_name] = [("Genotypes", "Genotypes")] - if group_name in types[species]: - types_list = build_types(species, group_name) - if len(types_list) > 0: - types[species][group_name] += types_list - else: - if not phenotypes_exist(group_name) and not genotypes_exist(group_name): - types[species].pop(group_name, None) - groups[species] = tuple( - group for group in groups[species] if group[0] != group_name) - else: # ZS: This whole else statement might be unnecessary, need to check - types_list = build_types(species, group_name) - if len(types_list) > 0: - types[species][group_name] = types_list - else: - types[species].pop(group_name, None) - groups[species] = tuple( - group for group in groups[species] if group[0] != group_name) - return types - - -def phenotypes_exist(group_name): - #print("group_name:", group_name) - Cursor.execute("""select Name from PublishFreeze - where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) - - results = Cursor.fetchone() - #print("RESULTS:", results) - - if results != None: - return True - else: - return False - - -def genotypes_exist(group_name): - #print("group_name:", group_name) - Cursor.execute("""select Name from GenoFreeze - where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) - - results = Cursor.fetchone() - #print("RESULTS:", results) - - if results != None: - return True - else: - return False - - -def build_types(species, group): - """Fetches tissues - - Gets the tissues with data for this species/group - (all types except phenotype/genotype are tissues) - - """ - - Cursor.execute("""select distinct Tissue.Name - from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species - where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and - InbredSet.Name = '%s' and - ProbeFreeze.TissueId = Tissue.Id and - ProbeFreeze.InbredSetId = InbredSet.Id and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.public > 0 and - ProbeSetFreeze.confidentiality < 1 - order by Tissue.Name""" % (species, group)) - - results = [] - for result in Cursor.fetchall(): - if len(result): - these_datasets = build_datasets(species, group, result[0]) - if len(these_datasets) > 0: - results.append((result[0], result[0])) - - return results - - -def get_datasets(types): - """Build datasets list""" - datasets = {} - for species, group_dict in list(types.items()): - datasets[species] = {} - for group, type_list in list(group_dict.items()): - datasets[species][group] = {} - for type_name in type_list: - these_datasets = build_datasets(species, group, type_name[0]) - if len(these_datasets) > 0: - datasets[species][group][type_name[0]] = these_datasets - - return datasets - - -def build_datasets(species, group, type_name): - """Gets dataset names from database""" - dataset_text = dataset_value = None - datasets = [] - if type_name == "Phenotypes": - Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where - InbredSet.Name = '%s' and - PublishFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = PublishFreeze.Name order by - PublishFreeze.CreateTime asc""" % group) - - results = Cursor.fetchall() - if len(results) > 0: - for result in results: - print(result) - dataset_id = str(result[0]) - dataset_value = str(result[1]) - if group == 'MDP': - dataset_text = "Mouse Phenome Database" - else: - #dataset_text = "%s Phenotypes" % group - dataset_text = str(result[2]) - datasets.append((dataset_id, dataset_value, dataset_text)) - else: - dataset_id = "None" - dataset_value = "%sPublish" % group - dataset_text = "%s Phenotypes" % group - datasets.append((dataset_id, dataset_value, dataset_text)) - - elif type_name == "Genotypes": - Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where - InbredSet.Name = '%s' and - GenoFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = GenoFreeze.ShortName and - GenoFreeze.public > 0 and - GenoFreeze.confidentiality < 1 order by - GenoFreeze.CreateTime desc""" % group) - - results = Cursor.fetchone() - if results != None: - dataset_id = str(results[0]) - else: - dataset_id = "None" - dataset_value = "%sGeno" % group - dataset_text = "%s Genotypes" % group - datasets.append((dataset_id, dataset_value, dataset_text)) - - else: # for mRNA expression/ProbeSet - Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from - ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where - Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and - InbredSet.Name = '%s' and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and - ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and - ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by - ProbeSetFreeze.CreateTime desc""" % (species, group, type_name)) - - dataset_results = Cursor.fetchall() - datasets = [] - for dataset_info in dataset_results: - this_dataset_info = [] - for info in dataset_info: - this_dataset_info.append(str(info)) - datasets.append(this_dataset_info) - - return datasets - - -def main(cursor): - """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" - - species = get_species(cursor) - groups = get_groups(cursor, species) - types = get_types(groups) - datasets = get_datasets(types) - - #species.append(('All Species', 'All Species')) - #groups['All Species'] = [('All Groups', 'All Groups')] - #types['All Species'] = {} - #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] - #datasets['All Species'] = {} - #datasets['All Species']['All Groups'] = {} - #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] - - data = dict(species=species, - groups=groups, - types=types, - datasets=datasets, - ) - - #print("data:", data) - - output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json""" - - with open(output_file, 'w') as fh: - json.dump(data, fh, indent=" ", sort_keys=True) - - #print("\nWrote file to:", output_file) - - -def _test_it(): - """Used for internal testing only""" - types = build_types("Mouse", "BXD") - #print("build_types:", pf(types)) - datasets = build_datasets("Mouse", "BXD", "Hippocampus") - #print("build_datasets:", pf(datasets)) - - -if __name__ == '__main__': - with database_connection(get_setting("SQL_URI")) as conn: - with conn.cursor() as cursor: - main(cursor) |