diff options
author | Alexander_Kabui | 2024-01-02 13:21:07 +0300 |
---|---|---|
committer | Alexander_Kabui | 2024-01-02 13:21:07 +0300 |
commit | 70c4201b332e0e2c0d958428086512f291469b87 (patch) | |
tree | aea4fac8782c110fc233c589c3f0f7bd34bada6c /gn2/maintenance/gen_select_dataset.py | |
parent | 5092eb42f062b1695c4e39619f0bd74a876cfac2 (diff) | |
parent | 965ce5114d585624d5edb082c710b83d83a3be40 (diff) | |
download | genenetwork2-70c4201b332e0e2c0d958428086512f291469b87.tar.gz |
merge changes
Diffstat (limited to 'gn2/maintenance/gen_select_dataset.py')
-rw-r--r-- | gn2/maintenance/gen_select_dataset.py | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/gn2/maintenance/gen_select_dataset.py b/gn2/maintenance/gen_select_dataset.py new file mode 100644 index 00000000..5f41da29 --- /dev/null +++ b/gn2/maintenance/gen_select_dataset.py @@ -0,0 +1,296 @@ +"""Script that generates the data for the main dropdown menus on the home page + +Writes out data as /static/new/javascript/dataset_menu_structure.json +It needs to be run manually when database has been changed. Run it as + + ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py + +""" + + +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams +# at rwilliams@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) + +import sys + +# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead +sys.path.insert(0, './') +# NEW: import app to avoid a circular dependency on utility.tools +from gn2.wqflask import app + +from gn2.utility.tools import get_setting + +import simplejson as json +import urllib.parse + + +from pprint import pformat as pf + +from gn2.wqflask.database import database_connection + + +def get_species(cursor): + """Build species list""" + #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") + cursor.execute("select Name, MenuName from Species order by OrderId") + species = list(cursor.fetchall()) + return species + + +def get_groups(cursor, species): + """Build groups list""" + groups = {} + for species_name, _species_full_name in species: + cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, + Species, + ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' + and InbredSet.SpeciesId = Species.Id and + (PublishFreeze.InbredSetId = InbredSet.Id + or GenoFreeze.InbredSetId = InbredSet.Id + or ProbeFreeze.InbredSetId = InbredSet.Id) + group by InbredSet.Name + order by InbredSet.FullName""" % species_name) + results = cursor.fetchall() + groups[species_name] = list(results) + return groups + + +def get_types(groups): + """Build types list""" + types = {} + #print("Groups: ", pf(groups)) + for species, group_dict in list(groups.items()): + types[species] = {} + for group_name, _group_full_name in group_dict: + # make group an alias to shorten the code + #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] + if phenotypes_exist(group_name): + types[species][group_name] = [("Phenotypes", "Phenotypes")] + if genotypes_exist(group_name): + if group_name in types[species]: + types[species][group_name] += [("Genotypes", "Genotypes")] + else: + types[species][group_name] = [("Genotypes", "Genotypes")] + if group_name in types[species]: + types_list = build_types(species, group_name) + if len(types_list) > 0: + types[species][group_name] += types_list + else: + if not phenotypes_exist(group_name) and not genotypes_exist(group_name): + types[species].pop(group_name, None) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) + else: # ZS: This whole else statement might be unnecessary, need to check + types_list = build_types(species, group_name) + if len(types_list) > 0: + types[species][group_name] = types_list + else: + types[species].pop(group_name, None) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) + return types + + +def phenotypes_exist(group_name): + #print("group_name:", group_name) + Cursor.execute("""select Name from PublishFreeze + where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) + + results = Cursor.fetchone() + #print("RESULTS:", results) + + if results != None: + return True + else: + return False + + +def genotypes_exist(group_name): + #print("group_name:", group_name) + Cursor.execute("""select Name from GenoFreeze + where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) + + results = Cursor.fetchone() + #print("RESULTS:", results) + + if results != None: + return True + else: + return False + + +def build_types(species, group): + """Fetches tissues + + Gets the tissues with data for this species/group + (all types except phenotype/genotype are tissues) + + """ + + Cursor.execute("""select distinct Tissue.Name + from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species + where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '%s' and + ProbeFreeze.TissueId = Tissue.Id and + ProbeFreeze.InbredSetId = InbredSet.Id and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and + ProbeSetFreeze.public > 0 and + ProbeSetFreeze.confidentiality < 1 + order by Tissue.Name""" % (species, group)) + + results = [] + for result in Cursor.fetchall(): + if len(result): + these_datasets = build_datasets(species, group, result[0]) + if len(these_datasets) > 0: + results.append((result[0], result[0])) + + return results + + +def get_datasets(types): + """Build datasets list""" + datasets = {} + for species, group_dict in list(types.items()): + datasets[species] = {} + for group, type_list in list(group_dict.items()): + datasets[species][group] = {} + for type_name in type_list: + these_datasets = build_datasets(species, group, type_name[0]) + if len(these_datasets) > 0: + datasets[species][group][type_name[0]] = these_datasets + + return datasets + + +def build_datasets(species, group, type_name): + """Gets dataset names from database""" + dataset_text = dataset_value = None + datasets = [] + if type_name == "Phenotypes": + Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where + InbredSet.Name = '%s' and + PublishFreeze.InbredSetId = InbredSet.Id and + InfoFiles.InfoPageName = PublishFreeze.Name order by + PublishFreeze.CreateTime asc""" % group) + + results = Cursor.fetchall() + if len(results) > 0: + for result in results: + print(result) + dataset_id = str(result[0]) + dataset_value = str(result[1]) + if group == 'MDP': + dataset_text = "Mouse Phenome Database" + else: + #dataset_text = "%s Phenotypes" % group + dataset_text = str(result[2]) + datasets.append((dataset_id, dataset_value, dataset_text)) + else: + dataset_id = "None" + dataset_value = "%sPublish" % group + dataset_text = "%s Phenotypes" % group + datasets.append((dataset_id, dataset_value, dataset_text)) + + elif type_name == "Genotypes": + Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where + InbredSet.Name = '%s' and + GenoFreeze.InbredSetId = InbredSet.Id and + InfoFiles.InfoPageName = GenoFreeze.ShortName and + GenoFreeze.public > 0 and + GenoFreeze.confidentiality < 1 order by + GenoFreeze.CreateTime desc""" % group) + + results = Cursor.fetchone() + if results != None: + dataset_id = str(results[0]) + else: + dataset_id = "None" + dataset_value = "%sGeno" % group + dataset_text = "%s Genotypes" % group + datasets.append((dataset_id, dataset_value, dataset_text)) + + else: # for mRNA expression/ProbeSet + Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from + ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where + Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '%s' and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and + ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and + ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by + ProbeSetFreeze.CreateTime desc""" % (species, group, type_name)) + + dataset_results = Cursor.fetchall() + datasets = [] + for dataset_info in dataset_results: + this_dataset_info = [] + for info in dataset_info: + this_dataset_info.append(str(info)) + datasets.append(this_dataset_info) + + return datasets + + +def main(cursor): + """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" + + species = get_species(cursor) + groups = get_groups(cursor, species) + types = get_types(groups) + datasets = get_datasets(types) + + #species.append(('All Species', 'All Species')) + #groups['All Species'] = [('All Groups', 'All Groups')] + #types['All Species'] = {} + #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] + #datasets['All Species'] = {} + #datasets['All Species']['All Groups'] = {} + #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] + + data = dict(species=species, + groups=groups, + types=types, + datasets=datasets, + ) + + #print("data:", data) + + output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json""" + + with open(output_file, 'w') as fh: + json.dump(data, fh, indent=" ", sort_keys=True) + + #print("\nWrote file to:", output_file) + + +def _test_it(): + """Used for internal testing only""" + types = build_types("Mouse", "BXD") + #print("build_types:", pf(types)) + datasets = build_datasets("Mouse", "BXD", "Hippocampus") + #print("build_datasets:", pf(datasets)) + + +if __name__ == '__main__': + with database_connection(get_setting("SQL_URI")) as conn: + with conn.cursor() as cursor: + main(cursor) |