merge changes

author: Alexander_Kabui 2024-01-02 13:21:07 +0300
committer: Alexander_Kabui 2024-01-02 13:21:07 +0300
commit: 70c4201b332e0e2c0d958428086512f291469b87 (patch)
tree: aea4fac8782c110fc233c589c3f0f7bd34bada6c /gn2/maintenance/gen_select_dataset.py
parent: 5092eb42f062b1695c4e39619f0bd74a876cfac2 (diff)
parent: 965ce5114d585624d5edb082c710b83d83a3be40 (diff)
download: genenetwork2-70c4201b332e0e2c0d958428086512f291469b87.tar.gz
1 files changed, 296 insertions, 0 deletions
diff --git a/gn2/maintenance/gen_select_dataset.py b/gn2/maintenance/gen_select_dataset.py
new file mode 100644
index 00000000..5f41da29
--- /dev/null
+++ b/gn2/maintenance/gen_select_dataset.py
@@ -0,0 +1,296 @@
+"""Script that generates the data for the main dropdown menus on the home page
+
+Writes out data as /static/new/javascript/dataset_menu_structure.json
+It needs to be run manually when database has been changed. Run it as
+
+  ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
+
+"""
+
+
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams
+# at rwilliams@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+
+import sys
+
+# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
+sys.path.insert(0, './')
+# NEW: import app to avoid a circular dependency on utility.tools
+from gn2.wqflask import app
+
+from gn2.utility.tools import get_setting
+
+import simplejson as json
+import urllib.parse
+
+
+from pprint import pformat as pf
+
+from gn2.wqflask.database import database_connection
+
+
+def get_species(cursor):
+    """Build species list"""
+    #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
+    cursor.execute("select Name, MenuName from Species order by OrderId")
+    species = list(cursor.fetchall())
+    return species
+
+
+def get_groups(cursor, species):
+    """Build groups list"""
+    groups = {}
+    for species_name, _species_full_name in species:
+        cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet,
+                       Species,
+                       ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s'
+                       and InbredSet.SpeciesId = Species.Id and
+                       (PublishFreeze.InbredSetId = InbredSet.Id
+                        or GenoFreeze.InbredSetId = InbredSet.Id
+                        or ProbeFreeze.InbredSetId = InbredSet.Id)
+                        group by InbredSet.Name
+                        order by InbredSet.FullName""" % species_name)
+        results = cursor.fetchall()
+        groups[species_name] = list(results)
+    return groups
+
+
+def get_types(groups):
+    """Build types list"""
+    types = {}
+    #print("Groups: ", pf(groups))
+    for species, group_dict in list(groups.items()):
+        types[species] = {}
+        for group_name, _group_full_name in group_dict:
+            # make group an alias to shorten the code
+            #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")]
+            if phenotypes_exist(group_name):
+                types[species][group_name] = [("Phenotypes", "Phenotypes")]
+            if genotypes_exist(group_name):
+                if group_name in types[species]:
+                    types[species][group_name] += [("Genotypes", "Genotypes")]
+                else:
+                    types[species][group_name] = [("Genotypes", "Genotypes")]
+            if group_name in types[species]:
+                types_list = build_types(species, group_name)
+                if len(types_list) > 0:
+                    types[species][group_name] += types_list
+                else:
+                    if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
+                        types[species].pop(group_name, None)
+                        groups[species] = tuple(
+                            group for group in groups[species] if group[0] != group_name)
+            else:  # ZS: This whole else statement might be unnecessary, need to check
+                types_list = build_types(species, group_name)
+                if len(types_list) > 0:
+                    types[species][group_name] = types_list
+                else:
+                    types[species].pop(group_name, None)
+                    groups[species] = tuple(
+                        group for group in groups[species] if group[0] != group_name)
+    return types
+
+
+def phenotypes_exist(group_name):
+    #print("group_name:", group_name)
+    Cursor.execute("""select Name from PublishFreeze
+                      where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
+
+    results = Cursor.fetchone()
+    #print("RESULTS:", results)
+
+    if results != None:
+        return True
+    else:
+        return False
+
+
+def genotypes_exist(group_name):
+    #print("group_name:", group_name)
+    Cursor.execute("""select Name from GenoFreeze
+                      where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
+
+    results = Cursor.fetchone()
+    #print("RESULTS:", results)
+
+    if results != None:
+        return True
+    else:
+        return False
+
+
+def build_types(species, group):
+    """Fetches tissues
+
+    Gets the tissues with data for this species/group
+    (all types except phenotype/genotype are tissues)
+
+    """
+
+    Cursor.execute("""select distinct Tissue.Name
+                       from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
+                       where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
+                       InbredSet.Name = '%s' and
+                       ProbeFreeze.TissueId = Tissue.Id and
+                       ProbeFreeze.InbredSetId = InbredSet.Id and
+                       ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
+                       ProbeSetFreeze.public > 0 and
+                       ProbeSetFreeze.confidentiality < 1
+                       order by Tissue.Name""" % (species, group))
+
+    results = []
+    for result in Cursor.fetchall():
+        if len(result):
+            these_datasets = build_datasets(species, group, result[0])
+            if len(these_datasets) > 0:
+                results.append((result[0], result[0]))
+
+    return results
+
+
+def get_datasets(types):
+    """Build datasets list"""
+    datasets = {}
+    for species, group_dict in list(types.items()):
+        datasets[species] = {}
+        for group, type_list in list(group_dict.items()):
+            datasets[species][group] = {}
+            for type_name in type_list:
+                these_datasets = build_datasets(species, group, type_name[0])
+                if len(these_datasets) > 0:
+                    datasets[species][group][type_name[0]] = these_datasets
+
+    return datasets
+
+
+def build_datasets(species, group, type_name):
+    """Gets dataset names from database"""
+    dataset_text = dataset_value = None
+    datasets = []
+    if type_name == "Phenotypes":
+        Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where
+                    InbredSet.Name = '%s' and
+                    PublishFreeze.InbredSetId = InbredSet.Id and
+                    InfoFiles.InfoPageName = PublishFreeze.Name order by
+                    PublishFreeze.CreateTime asc""" % group)
+
+        results = Cursor.fetchall()
+        if len(results) > 0:
+            for result in results:
+                print(result)
+                dataset_id = str(result[0])
+                dataset_value = str(result[1])
+                if group == 'MDP':
+                    dataset_text = "Mouse Phenome Database"
+                else:
+                    #dataset_text = "%s Phenotypes" % group
+                    dataset_text = str(result[2])
+                datasets.append((dataset_id, dataset_value, dataset_text))
+        else:
+            dataset_id = "None"
+            dataset_value = "%sPublish" % group
+            dataset_text = "%s Phenotypes" % group
+            datasets.append((dataset_id, dataset_value, dataset_text))
+
+    elif type_name == "Genotypes":
+        Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
+                    InbredSet.Name = '%s' and
+                    GenoFreeze.InbredSetId = InbredSet.Id and
+                    InfoFiles.InfoPageName = GenoFreeze.ShortName and
+                    GenoFreeze.public > 0 and
+                    GenoFreeze.confidentiality < 1 order by
+                    GenoFreeze.CreateTime desc""" % group)
+
+        results = Cursor.fetchone()
+        if results != None:
+            dataset_id = str(results[0])
+        else:
+            dataset_id = "None"
+        dataset_value = "%sGeno" % group
+        dataset_text = "%s Genotypes" % group
+        datasets.append((dataset_id, dataset_value, dataset_text))
+
+    else:  # for mRNA expression/ProbeSet
+        Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
+                    ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
+                    Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
+                    InbredSet.Name = '%s' and
+                    ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and
+                    ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and
+                    ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by
+                    ProbeSetFreeze.CreateTime desc""" % (species, group, type_name))
+
+        dataset_results = Cursor.fetchall()
+        datasets = []
+        for dataset_info in dataset_results:
+            this_dataset_info = []
+            for info in dataset_info:
+                this_dataset_info.append(str(info))
+            datasets.append(this_dataset_info)
+
+    return datasets
+
+
+def main(cursor):
+    """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
+
+    species = get_species(cursor)
+    groups = get_groups(cursor, species)
+    types = get_types(groups)
+    datasets = get_datasets(types)
+
+    #species.append(('All Species', 'All Species'))
+    #groups['All Species'] = [('All Groups', 'All Groups')]
+    #types['All Species'] = {}
+    #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
+    #datasets['All Species'] = {}
+    #datasets['All Species']['All Groups'] = {}
+    #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
+
+    data = dict(species=species,
+                groups=groups,
+                types=types,
+                datasets=datasets,
+                )
+
+    #print("data:", data)
+
+    output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json"""
+
+    with open(output_file, 'w') as fh:
+        json.dump(data, fh, indent="   ", sort_keys=True)
+
+    #print("\nWrote file to:", output_file)
+
+
+def _test_it():
+    """Used for internal testing only"""
+    types = build_types("Mouse", "BXD")
+    #print("build_types:", pf(types))
+    datasets = build_datasets("Mouse", "BXD", "Hippocampus")
+    #print("build_datasets:", pf(datasets))
+
+
+if __name__ == '__main__':
+    with database_connection(get_setting("SQL_URI")) as conn:
+        with conn.cursor() as cursor:
+            main(cursor)
author	Alexander_Kabui	2024-01-02 13:21:07 +0300
committer	Alexander_Kabui	2024-01-02 13:21:07 +0300
commit	70c4201b332e0e2c0d958428086512f291469b87 (patch)
tree	aea4fac8782c110fc233c589c3f0f7bd34bada6c /gn2/maintenance/gen_select_dataset.py
parent	5092eb42f062b1695c4e39619f0bd74a876cfac2 (diff)
parent	965ce5114d585624d5edb082c710b83d83a3be40 (diff)
download	genenetwork2-70c4201b332e0e2c0d958428086512f291469b87.tar.gz