"""Script that generates the data for the main dropdown menus on the home page
Writes out data as /static/new/javascript/dataset_menu_structure.json
It needs to be run manually when database has been changed. Run it as
./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
"""
# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams
# at rwilliams@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
import sys
# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
sys.path.insert(0, './')
# NEW: import app to avoid a circular dependency on utility.tools
from gn2.wqflask import app
from gn2.utility.tools import get_setting
import simplejson as json
import urllib.parse
from pprint import pformat as pf
from gn2.wqflask.database import database_connection
def get_species(cursor):
"""Build species list"""
#cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
cursor.execute("select Name, MenuName from Species order by OrderId")
species = list(cursor.fetchall())
return species
def get_groups(cursor, species):
"""Build groups list"""
groups = {}
for species_name, _species_full_name in species:
cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet,
Species,
ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s'
and InbredSet.SpeciesId = Species.Id and
(PublishFreeze.InbredSetId = InbredSet.Id
or GenoFreeze.InbredSetId = InbredSet.Id
or ProbeFreeze.InbredSetId = InbredSet.Id)
group by InbredSet.Name
order by InbredSet.FullName""" % species_name)
results = cursor.fetchall()
groups[species_name] = list(results)
return groups
def get_types(groups):
"""Build types list"""
types = {}
#print("Groups: ", pf(groups))
for species, group_dict in list(groups.items()):
types[species] = {}
for group_name, _group_full_name in group_dict:
# make group an alias to shorten the code
#types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")]
if phenotypes_exist(group_name):
types[species][group_name] = [("Phenotypes", "Phenotypes")]
if genotypes_exist(group_name):
if group_name in types[species]:
types[species][group_name] += [("Genotypes", "Genotypes")]
else:
types[species][group_name] = [("Genotypes", "Genotypes")]
if group_name in types[species]:
types_list = build_types(species, group_name)
if len(types_list) > 0:
types[species][group_name] += types_list
else:
if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
types[species].pop(group_name, None)
groups[species] = tuple(
group for group in groups[species] if group[0] != group_name)
else: # ZS: This whole else statement might be unnecessary, need to check
types_list = build_types(species, group_name)
if len(types_list) > 0:
types[species][group_name] = types_list
else:
types[species].pop(group_name, None)
groups[species] = tuple(
group for group in groups[species] if group[0] != group_name)
return types
def phenotypes_exist(group_name):
#print("group_name:", group_name)
Cursor.execute("""select Name from PublishFreeze
where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
results = Cursor.fetchone()
#print("RESULTS:", results)
if results != None:
return True
else:
return False
def genotypes_exist(group_name):
#print("group_name:", group_name)
Cursor.execute("""select Name from GenoFreeze
where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
results = Cursor.fetchone()
#print("RESULTS:", results)
if results != None:
return True
else:
return False
def build_types(species, group):
"""Fetches tissues
Gets the tissues with data for this species/group
(all types except phenotype/genotype are tissues)
"""
Cursor.execute("""select distinct Tissue.Name
from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
InbredSet.Name = '%s' and
ProbeFreeze.TissueId = Tissue.Id and
ProbeFreeze.InbredSetId = InbredSet.Id and
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
ProbeSetFreeze.public > 0 and
ProbeSetFreeze.confidentiality < 1
order by Tissue.Name""" % (species, group))
results = []
for result in Cursor.fetchall():
if len(result):
these_datasets = build_datasets(species, group, result[0])
if len(these_datasets) > 0:
results.append((result[0], result[0]))
return results
def get_datasets(types):
"""Build datasets list"""
datasets = {}
for species, group_dict in list(types.items()):
datasets[species] = {}
for group, type_list in list(group_dict.items()):
datasets[species][group] = {}
for type_name in type_list:
these_datasets = build_datasets(species, group, type_name[0])
if len(these_datasets) > 0:
datasets[species][group][type_name[0]] = these_datasets
return datasets
def build_datasets(species, group, type_name):
"""Gets dataset names from database"""
dataset_text = dataset_value = None
datasets = []
if type_name == "Phenotypes":
Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where
InbredSet.Name = '%s' and
PublishFreeze.InbredSetId = InbredSet.Id and
InfoFiles.InfoPageName = PublishFreeze.Name order by
PublishFreeze.CreateTime asc""" % group)
results = Cursor.fetchall()
if len(results) > 0:
for result in results:
print(result)
dataset_id = str(result[0])
dataset_value = str(result[1])
if group == 'MDP':
dataset_text = "Mouse Phenome Database"
else:
#dataset_text = "%s Phenotypes" % group
dataset_text = str(result[2])
datasets.append((dataset_id, dataset_value, dataset_text))
else:
dataset_id = "None"
dataset_value = "%sPublish" % group
dataset_text = "%s Phenotypes" % group
datasets.append((dataset_id, dataset_value, dataset_text))
elif type_name == "Genotypes":
Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
InbredSet.Name = '%s' and
GenoFreeze.InbredSetId = InbredSet.Id and
InfoFiles.InfoPageName = GenoFreeze.ShortName and
GenoFreeze.public > 0 and
GenoFreeze.confidentiality < 1 order by
GenoFreeze.CreateTime desc""" % group)
results = Cursor.fetchone()
if results != None:
dataset_id = str(results[0])
else:
dataset_id = "None"
dataset_value = "%sGeno" % group
dataset_text = "%s Genotypes" % group
datasets.append((dataset_id, dataset_value, dataset_text))
else: # for mRNA expression/ProbeSet
Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
InbredSet.Name = '%s' and
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and
ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and
ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by
ProbeSetFreeze.CreateTime desc""" % (species, group, type_name))
dataset_results = Cursor.fetchall()
datasets = []
for dataset_info in dataset_results:
this_dataset_info = []
for info in dataset_info:
this_dataset_info.append(str(info))
datasets.append(this_dataset_info)
return datasets
def main(cursor):
"""Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
species = get_species(cursor)
groups = get_groups(cursor, species)
types = get_types(groups)
datasets = get_datasets(types)
#species.append(('All Species', 'All Species'))
#groups['All Species'] = [('All Groups', 'All Groups')]
#types['All Species'] = {}
#types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
#datasets['All Species'] = {}
#datasets['All Species']['All Groups'] = {}
#datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
data = dict(species=species,
groups=groups,
types=types,
datasets=datasets,
)
#print("data:", data)
output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json"""
with open(output_file, 'w') as fh:
json.dump(data, fh, indent=" ", sort_keys=True)
#print("\nWrote file to:", output_file)
def _test_it():
"""Used for internal testing only"""
types = build_types("Mouse", "BXD")
#print("build_types:", pf(types))
datasets = build_datasets("Mouse", "BXD", "Hippocampus")
#print("build_datasets:", pf(datasets))
if __name__ == '__main__':
with database_connection(get_setting("SQL_URI")) as conn:
with conn.cursor() as cursor:
main(cursor)