"""Script that generates the data for the main dropdown menus on the home page Writes out data as /static/new/javascript/dataset_menu_structure.json It needs to be run manually when database has been changed. Run it as ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py """ # Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License # as published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Affero General Public License for more details. # # This program is available from Source Forge: at GeneNetwork Project # (sourceforge.net/projects/genenetwork/). # # Contact Drs. Robert W. Williams # at rwilliams@uthsc.edu # # # # This module is used by GeneNetwork project (www.genenetwork.org) import sys # NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead sys.path.insert(0, './') # NEW: import app to avoid a circular dependency on utility.tools from gn2.wqflask import app from gn2.utility.tools import get_setting import simplejson as json import urllib.parse from pprint import pformat as pf from gn2.wqflask.database import database_connection def get_species(cursor): """Build species list""" #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") cursor.execute("select Name, MenuName from Species order by OrderId") species = list(cursor.fetchall()) return species def get_groups(cursor, species): """Build groups list""" groups = {} for species_name, _species_full_name in species: cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, Species, ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' and InbredSet.SpeciesId = Species.Id and (PublishFreeze.InbredSetId = InbredSet.Id or GenoFreeze.InbredSetId = InbredSet.Id or ProbeFreeze.InbredSetId = InbredSet.Id) group by InbredSet.Name order by InbredSet.FullName""" % species_name) results = cursor.fetchall() groups[species_name] = list(results) return groups def get_types(groups): """Build types list""" types = {} #print("Groups: ", pf(groups)) for species, group_dict in list(groups.items()): types[species] = {} for group_name, _group_full_name in group_dict: # make group an alias to shorten the code #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] if phenotypes_exist(group_name): types[species][group_name] = [("Phenotypes", "Phenotypes")] if genotypes_exist(group_name): if group_name in types[species]: types[species][group_name] += [("Genotypes", "Genotypes")] else: types[species][group_name] = [("Genotypes", "Genotypes")] if group_name in types[species]: types_list = build_types(species, group_name) if len(types_list) > 0: types[species][group_name] += types_list else: if not phenotypes_exist(group_name) and not genotypes_exist(group_name): types[species].pop(group_name, None) groups[species] = tuple( group for group in groups[species] if group[0] != group_name) else: # ZS: This whole else statement might be unnecessary, need to check types_list = build_types(species, group_name) if len(types_list) > 0: types[species][group_name] = types_list else: types[species].pop(group_name, None) groups[species] = tuple( group for group in groups[species] if group[0] != group_name) return types def phenotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from PublishFreeze where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) results = Cursor.fetchone() #print("RESULTS:", results) if results != None: return True else: return False def genotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from GenoFreeze where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) results = Cursor.fetchone() #print("RESULTS:", results) if results != None: return True else: return False def build_types(species, group): """Fetches tissues Gets the tissues with data for this species/group (all types except phenotype/genotype are tissues) """ Cursor.execute("""select distinct Tissue.Name from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and InbredSet.Name = '%s' and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and ProbeSetFreeze.public > 0 and ProbeSetFreeze.confidentiality < 1 order by Tissue.Name""" % (species, group)) results = [] for result in Cursor.fetchall(): if len(result): these_datasets = build_datasets(species, group, result[0]) if len(these_datasets) > 0: results.append((result[0], result[0])) return results def get_datasets(types): """Build datasets list""" datasets = {} for species, group_dict in list(types.items()): datasets[species] = {} for group, type_list in list(group_dict.items()): datasets[species][group] = {} for type_name in type_list: these_datasets = build_datasets(species, group, type_name[0]) if len(these_datasets) > 0: datasets[species][group][type_name[0]] = these_datasets return datasets def build_datasets(species, group, type_name): """Gets dataset names from database""" dataset_text = dataset_value = None datasets = [] if type_name == "Phenotypes": Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where InbredSet.Name = '%s' and PublishFreeze.InbredSetId = InbredSet.Id and InfoFiles.InfoPageName = PublishFreeze.Name order by PublishFreeze.CreateTime asc""" % group) results = Cursor.fetchall() if len(results) > 0: for result in results: print(result) dataset_id = str(result[0]) dataset_value = str(result[1]) if group == 'MDP': dataset_text = "Mouse Phenome Database" else: #dataset_text = "%s Phenotypes" % group dataset_text = str(result[2]) datasets.append((dataset_id, dataset_value, dataset_text)) else: dataset_id = "None" dataset_value = "%sPublish" % group dataset_text = "%s Phenotypes" % group datasets.append((dataset_id, dataset_value, dataset_text)) elif type_name == "Genotypes": Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where InbredSet.Name = '%s' and GenoFreeze.InbredSetId = InbredSet.Id and InfoFiles.InfoPageName = GenoFreeze.ShortName and GenoFreeze.public > 0 and GenoFreeze.confidentiality < 1 order by GenoFreeze.CreateTime desc""" % group) results = Cursor.fetchone() if results != None: dataset_id = str(results[0]) else: dataset_id = "None" dataset_value = "%sGeno" % group dataset_text = "%s Genotypes" % group datasets.append((dataset_id, dataset_value, dataset_text)) else: # for mRNA expression/ProbeSet Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and InbredSet.Name = '%s' and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by ProbeSetFreeze.CreateTime desc""" % (species, group, type_name)) dataset_results = Cursor.fetchall() datasets = [] for dataset_info in dataset_results: this_dataset_info = [] for info in dataset_info: this_dataset_info.append(str(info)) datasets.append(this_dataset_info) return datasets def main(cursor): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" species = get_species(cursor) groups = get_groups(cursor, species) types = get_types(groups) datasets = get_datasets(types) #species.append(('All Species', 'All Species')) #groups['All Species'] = [('All Groups', 'All Groups')] #types['All Species'] = {} #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] #datasets['All Species'] = {} #datasets['All Species']['All Groups'] = {} #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] data = dict(species=species, groups=groups, types=types, datasets=datasets, ) #print("data:", data) output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json""" with open(output_file, 'w') as fh: json.dump(data, fh, indent=" ", sort_keys=True) #print("\nWrote file to:", output_file) def _test_it(): """Used for internal testing only""" types = build_types("Mouse", "BXD") #print("build_types:", pf(types)) datasets = build_datasets("Mouse", "BXD", "Hippocampus") #print("build_datasets:", pf(datasets)) if __name__ == '__main__': with database_connection(get_setting("SQL_URI")) as conn: with conn.cursor() as cursor: main(cursor)