diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/README.md | 4 | ||||
-rw-r--r-- | wqflask/maintenance/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/maintenance/convert_dryad_to_bimbam.py | 72 | ||||
-rw-r--r-- | wqflask/maintenance/convert_geno_to_bimbam.py | 201 | ||||
-rw-r--r-- | wqflask/maintenance/gen_ind_genofiles.py | 253 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 296 | ||||
-rw-r--r-- | wqflask/maintenance/generate_kinship_from_bimbam.py | 66 | ||||
-rw-r--r-- | wqflask/maintenance/generate_probesetfreeze_file.py | 122 | ||||
-rw-r--r-- | wqflask/maintenance/geno_to_json.py | 196 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 47 | ||||
-rw-r--r-- | wqflask/maintenance/print_benchmark.py | 45 | ||||
-rw-r--r-- | wqflask/maintenance/quantile_normalize.py | 98 | ||||
-rw-r--r-- | wqflask/maintenance/set_resource_defaults.py | 153 |
13 files changed, 0 insertions, 1553 deletions
diff --git a/wqflask/maintenance/README.md b/wqflask/maintenance/README.md deleted file mode 100644 index 873eaa32..00000000 --- a/wqflask/maintenance/README.md +++ /dev/null @@ -1,4 +0,0 @@ -Maintenance files have been moved into a separate repository named -*gn_extra*. See https://github.com/genenetwork/gn_extra - - diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py deleted file mode 100644 index e69de29b..00000000 --- a/wqflask/maintenance/__init__.py +++ /dev/null diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py deleted file mode 100644 index 18fbb8a1..00000000 --- a/wqflask/maintenance/convert_dryad_to_bimbam.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python - -""" -Convert data dryad files to a BIMBAM _geno and _snps file - - -""" - -import sys -sys.path.append("..") - - -def read_dryad_file(filename): - exclude_count = 0 - marker_list = [] - sample_dict = {} - sample_list = [] - geno_rows = [] - with open(filename, 'r') as the_file: - for i, line in enumerate(the_file): - if i > 0: - if line.split(" ")[1] == "no": - sample_name = line.split(" ")[0] - sample_list.append(sample_name) - sample_dict[sample_name] = line.split(" ")[2:] - else: - exclude_count += 1 - else: - marker_list = line.split(" ")[2:] - - for i, marker in enumerate(marker_list): - this_row = [] - this_row.append(marker) - this_row.append("X") - this_row.append("Y") - for sample in sample_list: - this_row.append(sample_dict[sample][i]) - geno_rows.append(this_row) - - print(exclude_count) - - return geno_rows - - # for i, marker in enumerate(marker_list): - # this_row = [] - # this_row.append(marker) - # this_row.append("X") - # this_row.append("Y") - # with open(filename, 'r') as the_file: - # for j, line in enumerate(the_file): - # if j > 0: - # this_row.append(line.split(" ")[i+2]) - # print("row: " + str(i)) - # geno_rows.append(this_row) - # - # return geno_rows - - -def write_bimbam_files(geno_rows): - with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: - for row in geno_rows: - geno_fh.write(", ".join(row) + "\n") - - -def convert_dryad_to_bimbam(filename): - geno_file_rows = read_dryad_file(filename) - write_bimbam_files(geno_file_rows) - - -if __name__ == "__main__": - input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" - convert_dryad_to_bimbam(input_filename) diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py deleted file mode 100644 index 078be529..00000000 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ /dev/null @@ -1,201 +0,0 @@ -#!/usr/bin/python - -""" -Convert .geno files to json - -This file goes through all of the genofiles in the genofile directory (.geno) -and converts them to json files that are used when running the marker regression -code - -""" - -import sys -sys.path.append("..") -import os -import glob -import traceback -import gzip - -import simplejson as json - -from pprint import pformat as pf - - -class EmptyConfigurations(Exception): - pass - - -class Marker: - def __init__(self): - self.name = None - self.chr = None - self.cM = None - self.Mb = None - self.genotypes = [] - - -class ConvertGenoFile: - - def __init__(self, input_file, output_files): - self.input_file = input_file - self.output_files = output_files - - self.mb_exists = False - self.cm_exists = False - self.markers = [] - - self.latest_row_pos = None - self.latest_col_pos = None - - self.latest_row_value = None - self.latest_col_value = None - - def convert(self): - self.haplotype_notation = { - '@mat': "1", - '@pat': "0", - '@het': "0.5", - '@unk': "NA" - } - - self.configurations = {} - self.input_fh = open(self.input_file) - - self.process_csv() - - def process_csv(self): - for row in self.process_rows(): - row_items = row.split("\t") - - this_marker = Marker() - this_marker.name = row_items[1] - this_marker.chr = row_items[0] - if self.cm_exists and self.mb_exists: - this_marker.cM = row_items[2] - this_marker.Mb = row_items[3] - genotypes = row_items[4:] - elif self.cm_exists: - this_marker.cM = row_items[2] - genotypes = row_items[3:] - elif self.mb_exists: - this_marker.Mb = row_items[2] - genotypes = row_items[3:] - else: - genotypes = row_items[2:] - for item_count, genotype in enumerate(genotypes): - if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append( - self.configurations[genotype.upper().strip()]) - else: - this_marker.genotypes.append("NA") - - self.markers.append(this_marker.__dict__) - - self.write_to_bimbam() - - def write_to_bimbam(self): - with open(self.output_files[0], "w") as geno_fh: - for marker in self.markers: - geno_fh.write(marker['name']) - geno_fh.write(", X, Y") - geno_fh.write(", " + ", ".join(marker['genotypes'])) - geno_fh.write("\n") - - with open(self.output_files[1], "w") as pheno_fh: - for sample in self.sample_list: - pheno_fh.write("1\n") - - with open(self.output_files[2], "w") as snp_fh: - for marker in self.markers: - if self.mb_exists: - snp_fh.write( - marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") - else: - snp_fh.write( - marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") - - def get_sample_list(self, row_contents): - self.sample_list = [] - if self.mb_exists: - if self.cm_exists: - self.sample_list = row_contents[4:] - else: - self.sample_list = row_contents[3:] - else: - if self.cm_exists: - self.sample_list = row_contents[3:] - else: - self.sample_list = row_contents[2:] - - def process_rows(self): - for self.latest_row_pos, row in enumerate(self.input_fh): - self.latest_row_value = row - # Take care of headers - if not row.strip(): - continue - if row.startswith('#'): - continue - if row.startswith('Chr'): - if 'Mb' in row.split(): - self.mb_exists = True - if 'cM' in row.split(): - self.cm_exists = True - self.get_sample_list(row.split()) - continue - if row.startswith('@'): - key, _separater, value = row.partition(':') - key = key.strip() - value = value.strip() - if key == "@filler": - raise EmptyConfigurations - if key in self.haplotype_notation: - self.configurations[value] = self.haplotype_notation[key] - continue - if not len(self.configurations): - raise EmptyConfigurations - yield row - - @classmethod - def process_all(cls, old_directory, new_directory): - os.chdir(old_directory) - for input_file in glob.glob("*"): - if not input_file.endswith(('geno', '.geno.gz')): - continue - group_name = ".".join(input_file.split('.')[:-1]) - if group_name == "HSNIH-Palmer": - continue - geno_output_file = os.path.join( - new_directory, group_name + "_geno.txt") - pheno_output_file = os.path.join( - new_directory, group_name + "_pheno.txt") - snp_output_file = os.path.join( - new_directory, group_name + "_snps.txt") - output_files = [geno_output_file, - pheno_output_file, snp_output_file] - print("%s -> %s" % ( - os.path.join(old_directory, input_file), geno_output_file)) - convertob = ConvertGenoFile(input_file, output_files) - try: - convertob.convert() - except EmptyConfigurations as why: - print(" No config info? Continuing...") - continue - except Exception as why: - print(" Exception:", why) - print(traceback.print_exc()) - print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) - print(" Column is:", convertob.latest_col_value) - print(" Row is:", convertob.latest_row_value) - break - - -if __name__ == "__main__": - Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" - New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" - #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" - #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" - #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - # convertob.convert() - ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - # ConvertGenoFiles(Geno_Directory) diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py deleted file mode 100644 index b755c648..00000000 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env python3 -"""A script that generates the genotype files for groups of individuals, using an existing strain genotype file as a basis - -Example commands: -python3 gen_ind_genofiles.py - /home/zas1024/gn2-zach/genotype_files/genotype/ - /home/zas1024/gn2-zach/new_geno/ - BXD-Micturition.geno - BXD.json -python3 gen_ind_genofiles.py - /home/zas1024/gn2-zach/genotype_files/genotype - /home/zas1024/gn2-zach/new_geno/ - BXD-Micturition.geno - BXD.2.geno BXD.4.geno BXD.5.geno - -""" - -import json -import os -import sys -from typing import List - -import MySQLdb - -def conn(): - return MySQLdb.Connect(db=os.environ.get("DB_NAME"), - user=os.environ.get("DB_USER"), - passwd=os.environ.get("DB_PASS"), - host=os.environ.get("DB_HOST")) - -def main(args): - - # Directory in which .geno files are located - geno_dir = args[1] - - # Directory in which to output new files - out_dir = args[2] - - # The individuals group that we want to generate a .geno file for - target_file = geno_dir + args[3] - - # The source group(s) we're generating the .geno files from - # This can be passed as either a specific .geno file (or set of files as multiple arguments), - # or as a JSON file containing a set of .geno files (and their corresponding file names and sample lists) - geno_json = {} - source_files = [] - if ".json" in args[4]: - geno_json = json.load(open(geno_dir + args[4], "r")) - par_f1s = { - "mat": geno_json['mat'], - "pat": geno_json['pat'], - "f1s": geno_json['f1s'] - } - - # List of file titles and locations from JSON - source_files = [{'title': genofile['title'], 'location': geno_dir + genofile['location']} for genofile in geno_json['genofile']] - else: - par_f1s = {} - # List of files directly taken from command line arguments, with titles just set to the filename - for group in args[4:]: - file_name = geno_dir + group + ".geno" if ".geno" not in group else geno_dir + group - source_files.append({'title': file_name[:-5], 'location': file_name}) - - if len(source_files) > 1: - # Generate a JSON file pointing to the new target genotype files, in situations where there are multiple source .geno files - target_json_loc = out_dir + ".".join(args[3].split(".")[:-1]) + ".json" - target_json = {'genofile': []} - - # Generate the output .geno files - for source_file in source_files: - filename, samples = generate_new_genofile(source_file['location'], target_file, par_f1s, out_dir) - - target_json['genofile'].append({ - 'location': filename.split("/")[-1], - 'title': source_file['title'], - 'sample_list': samples - }) - - json.dump(target_json, open(target_json_loc, "w")) - else: - filename, samples = generate_new_genofile(source_files[0]['location'], target_file, par_f1s, out_dir) - -def get_strain_for_sample(sample): - query = ( - "SELECT CaseAttributeXRefNew.Value " - "FROM CaseAttributeXRefNew, Strain " - "WHERE CaseAttributeXRefNew.CaseAttributeId=11 " - "AND CaseAttributeXRefNew.StrainId = Strain.Id " - "AND Strain.Name = %(name)s" ) - - with conn().cursor() as cursor: - cursor.execute(query, {"name": sample.strip()}) - strain = cursor.fetchone()[0] - return strain - -def generate_new_genofile(source_genofile, target_genofile, par_f1s, out_dir): - source_samples = group_samples(source_genofile) - source_genotypes = strain_genotypes(source_genofile) - target_samples = group_samples(target_genofile) - strain_pos_map = map_strain_pos_to_target_group(source_samples, target_samples, par_f1s) - - if len(source_genofile.split("/")[-1].split(".")) > 2: - # The number in the source genofile; for example 4 in BXD.4.geno - source_num = source_genofile.split("/")[-1].split(".")[-2] - target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + "." + source_num + ".geno" - else: - target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + ".geno" - - file_location = out_dir + target_filename - - with open(file_location, "w") as fh: - for metadata in ["name", "type", "mat", "pat", "het", "unk"]: - fh.write("@" + metadata + ":" + source_genotypes[metadata] + "\n") - - header_line = ["Chr", "Locus", "cM", "Mb"] + target_samples - fh.write("\t".join(header_line) + "\n") - - for marker in source_genotypes['markers']: - line_items = [ - marker['Chr'], - marker['Locus'], - marker['cM'], - marker['Mb'] - ] - - for pos in strain_pos_map: - if isinstance(pos, int): - line_items.append(marker['genotypes'][pos]) - else: - if pos in ["mat", "pat"]: - line_items.append(source_genotypes[pos]) - elif pos == "f1s": - line_items.append("H") - else: - line_items.append("U") - - fh.write("\t".join(line_items) + "\n") - - return file_location, target_samples - -def map_strain_pos_to_target_group(source_samples, target_samples, par_f1s): - """ - Retrieve corresponding strain position for each sample in the target group - - This is so the genotypes from the base genofile can be mapped to the samples in the target group - - For example: - Base strains: BXD1, BXD2, BXD3 - Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3 - Returns: [0, 0, 1, 2, 2, 2] - """ - pos_map = [] - for sample in target_samples: - sample_strain = get_strain_for_sample(sample) - if sample_strain in source_samples: - pos_map.append(source_samples.index(sample_strain)) - else: - val = "U" - for key in par_f1s.keys(): - if sample_strain in par_f1s[key]: - val = key - pos_map.append(val) - - return pos_map - -def group_samples(target_file: str) -> List: - """ - Get the group samples from its "dummy" .geno file (which still contains the sample list) - """ - - sample_list = [] - with open(target_file, "r") as target_geno: - for i, line in enumerate(target_geno): - # Skip header lines - if line[0] in ["#", "@"] or not len(line): - continue - - line_items = line.split() - - sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]] - break - - return sample_list - -def strain_genotypes(strain_genofile: str) -> List: - """ - Read genotypes from source strain .geno file - - :param strain_genofile: string of genofile filename - :return: a list of dictionaries representing each marker's genotypes - - Example output: [ - { - 'Chr': '1', - 'Locus': 'marker1', - 'Mb': '10.0', - 'cM': '8.0', - 'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...] - }, - ... - ] - """ - - geno_dict = {} - - geno_start_col = None - header_columns = [] - sample_list = [] - markers = [] - with open(strain_genofile, "r") as source_geno: - for i, line in enumerate(source_geno): - if line[0] == "@": - metadata_type = line[1:].split(":")[0] - if metadata_type in ['name', 'type', 'mat', 'pat', 'het', 'unk']: - geno_dict[metadata_type] = line.split(":")[1].strip() - - continue - - # Skip other header lines - if line[0] == "#" or not len(line): - continue - - line_items = line.split("\t") - if "Chr" in line_items: # Header row - # Get the first column index containing genotypes - header_columns = line_items - for j, item in enumerate(line_items): - if item not in ["Chr", "Locus", "Mb", "cM"]: - geno_start_col = j - break - - sample_list = line_items[geno_start_col:] - if not geno_start_col: - print("Check .geno file - expected columns not found") - sys.exit() - else: # Marker rows - this_marker = { - 'Chr': line_items[header_columns.index("Chr")], - 'Locus': line_items[header_columns.index("Locus")], - 'Mb': line_items[header_columns.index("Mb")], - 'cM': line_items[header_columns.index("cM")], - 'genotypes': [item.strip() for item in line_items][geno_start_col:] - } - - markers.append(this_marker) - - geno_dict['markers'] = markers - - return geno_dict - -if __name__ == "__main__": - main(sys.argv) - diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py deleted file mode 100644 index 01b2fc15..00000000 --- a/wqflask/maintenance/gen_select_dataset.py +++ /dev/null @@ -1,296 +0,0 @@ -"""Script that generates the data for the main dropdown menus on the home page - -Writes out data as /static/new/javascript/dataset_menu_structure.json -It needs to be run manually when database has been changed. Run it as - - ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py - -""" - - -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams -# at rwilliams@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) - -import sys - -# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead -sys.path.insert(0, './') -# NEW: import app to avoid a circular dependency on utility.tools -from wqflask import app - -from utility.tools import get_setting - -import simplejson as json -import urllib.parse - - -from pprint import pformat as pf - -from wqflask.database import database_connection - - -def get_species(cursor): - """Build species list""" - #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") - cursor.execute("select Name, MenuName from Species order by OrderId") - species = list(cursor.fetchall()) - return species - - -def get_groups(cursor, species): - """Build groups list""" - groups = {} - for species_name, _species_full_name in species: - cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, - Species, - ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' - and InbredSet.SpeciesId = Species.Id and - (PublishFreeze.InbredSetId = InbredSet.Id - or GenoFreeze.InbredSetId = InbredSet.Id - or ProbeFreeze.InbredSetId = InbredSet.Id) - group by InbredSet.Name - order by InbredSet.FullName""" % species_name) - results = cursor.fetchall() - groups[species_name] = list(results) - return groups - - -def get_types(groups): - """Build types list""" - types = {} - #print("Groups: ", pf(groups)) - for species, group_dict in list(groups.items()): - types[species] = {} - for group_name, _group_full_name in group_dict: - # make group an alias to shorten the code - #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] - if phenotypes_exist(group_name): - types[species][group_name] = [("Phenotypes", "Phenotypes")] - if genotypes_exist(group_name): - if group_name in types[species]: - types[species][group_name] += [("Genotypes", "Genotypes")] - else: - types[species][group_name] = [("Genotypes", "Genotypes")] - if group_name in types[species]: - types_list = build_types(species, group_name) - if len(types_list) > 0: - types[species][group_name] += types_list - else: - if not phenotypes_exist(group_name) and not genotypes_exist(group_name): - types[species].pop(group_name, None) - groups[species] = tuple( - group for group in groups[species] if group[0] != group_name) - else: # ZS: This whole else statement might be unnecessary, need to check - types_list = build_types(species, group_name) - if len(types_list) > 0: - types[species][group_name] = types_list - else: - types[species].pop(group_name, None) - groups[species] = tuple( - group for group in groups[species] if group[0] != group_name) - return types - - -def phenotypes_exist(group_name): - #print("group_name:", group_name) - Cursor.execute("""select Name from PublishFreeze - where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) - - results = Cursor.fetchone() - #print("RESULTS:", results) - - if results != None: - return True - else: - return False - - -def genotypes_exist(group_name): - #print("group_name:", group_name) - Cursor.execute("""select Name from GenoFreeze - where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) - - results = Cursor.fetchone() - #print("RESULTS:", results) - - if results != None: - return True - else: - return False - - -def build_types(species, group): - """Fetches tissues - - Gets the tissues with data for this species/group - (all types except phenotype/genotype are tissues) - - """ - - Cursor.execute("""select distinct Tissue.Name - from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species - where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and - InbredSet.Name = '%s' and - ProbeFreeze.TissueId = Tissue.Id and - ProbeFreeze.InbredSetId = InbredSet.Id and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.public > 0 and - ProbeSetFreeze.confidentiality < 1 - order by Tissue.Name""" % (species, group)) - - results = [] - for result in Cursor.fetchall(): - if len(result): - these_datasets = build_datasets(species, group, result[0]) - if len(these_datasets) > 0: - results.append((result[0], result[0])) - - return results - - -def get_datasets(types): - """Build datasets list""" - datasets = {} - for species, group_dict in list(types.items()): - datasets[species] = {} - for group, type_list in list(group_dict.items()): - datasets[species][group] = {} - for type_name in type_list: - these_datasets = build_datasets(species, group, type_name[0]) - if len(these_datasets) > 0: - datasets[species][group][type_name[0]] = these_datasets - - return datasets - - -def build_datasets(species, group, type_name): - """Gets dataset names from database""" - dataset_text = dataset_value = None - datasets = [] - if type_name == "Phenotypes": - Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where - InbredSet.Name = '%s' and - PublishFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = PublishFreeze.Name order by - PublishFreeze.CreateTime asc""" % group) - - results = Cursor.fetchall() - if len(results) > 0: - for result in results: - print(result) - dataset_id = str(result[0]) - dataset_value = str(result[1]) - if group == 'MDP': - dataset_text = "Mouse Phenome Database" - else: - #dataset_text = "%s Phenotypes" % group - dataset_text = str(result[2]) - datasets.append((dataset_id, dataset_value, dataset_text)) - else: - dataset_id = "None" - dataset_value = "%sPublish" % group - dataset_text = "%s Phenotypes" % group - datasets.append((dataset_id, dataset_value, dataset_text)) - - elif type_name == "Genotypes": - Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where - InbredSet.Name = '%s' and - GenoFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = GenoFreeze.ShortName and - GenoFreeze.public > 0 and - GenoFreeze.confidentiality < 1 order by - GenoFreeze.CreateTime desc""" % group) - - results = Cursor.fetchone() - if results != None: - dataset_id = str(results[0]) - else: - dataset_id = "None" - dataset_value = "%sGeno" % group - dataset_text = "%s Genotypes" % group - datasets.append((dataset_id, dataset_value, dataset_text)) - - else: # for mRNA expression/ProbeSet - Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from - ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where - Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and - InbredSet.Name = '%s' and - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and - ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and - ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by - ProbeSetFreeze.CreateTime desc""" % (species, group, type_name)) - - dataset_results = Cursor.fetchall() - datasets = [] - for dataset_info in dataset_results: - this_dataset_info = [] - for info in dataset_info: - this_dataset_info.append(str(info)) - datasets.append(this_dataset_info) - - return datasets - - -def main(cursor): - """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" - - species = get_species(cursor) - groups = get_groups(cursor, species) - types = get_types(groups) - datasets = get_datasets(types) - - #species.append(('All Species', 'All Species')) - #groups['All Species'] = [('All Groups', 'All Groups')] - #types['All Species'] = {} - #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] - #datasets['All Species'] = {} - #datasets['All Species']['All Groups'] = {} - #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] - - data = dict(species=species, - groups=groups, - types=types, - datasets=datasets, - ) - - #print("data:", data) - - output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json""" - - with open(output_file, 'w') as fh: - json.dump(data, fh, indent=" ", sort_keys=True) - - #print("\nWrote file to:", output_file) - - -def _test_it(): - """Used for internal testing only""" - types = build_types("Mouse", "BXD") - #print("build_types:", pf(types)) - datasets = build_datasets("Mouse", "BXD", "Hippocampus") - #print("build_datasets:", pf(datasets)) - - -if __name__ == '__main__': - with database_connection(get_setting("SQL_URI")) as conn: - with conn.cursor() as cursor: - main(cursor) diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py deleted file mode 100644 index 9f01d094..00000000 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/python - -""" -Generate relatedness matrix files for GEMMA from BIMBAM genotype/phenotype files - -This file goes through all of the BIMBAM files in the bimbam diretory -and uses GEMMA to generate their corresponding kinship/relatedness matrix file - -""" - -import sys -sys.path.append("..") -import os -import glob - - -class GenerateKinshipMatrices: - def __init__(self, group_name, geno_file, pheno_file): - self.group_name = group_name - self.geno_file = geno_file - self.pheno_file = pheno_file - - def generate_kinship(self): - gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \ - " -p " + self.pheno_file + \ - " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name - print("command:", gemma_command) - os.system(gemma_command) - - @classmethod - def process_all(self, geno_dir, bimbam_dir): - os.chdir(geno_dir) - for input_file in glob.glob("*"): - if not input_file.endswith(('geno', '.geno.gz')): - continue - group_name = ".".join(input_file.split('.')[:-1]) - if group_name == "HSNIH-Palmer": - continue - geno_input_file = os.path.join( - bimbam_dir, group_name + "_geno.txt") - pheno_input_file = os.path.join( - bimbam_dir, group_name + "_pheno.txt") - convertob = GenerateKinshipMatrices( - group_name, geno_input_file, pheno_input_file) - try: - convertob.generate_kinship() - except EmptyConfigurations as why: - print(" No config info? Continuing...") - continue - except Exception as why: - - print(" Exception:", why) - print(traceback.print_exc()) - print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) - print(" Column is:", convertob.latest_col_value) - print(" Row is:", convertob.latest_row_value) - break - - -if __name__ == "__main__": - Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/""" - Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/""" - GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) - - # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py deleted file mode 100644 index 2f917c71..00000000 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/python - -import sys - -# sys.path.insert(0, "..") - why? - -import os -import collections -import csv - -from base import webqtlConfig - -from pprint import pformat as pf - -from utility.tools import get_setting -from wqflask.database import database_connection - - -def show_progress(process, counter): - if counter % 1000 == 0: - print("{}: {}".format(process, counter)) - - -def get_strains(cursor): - cursor.execute("""select Strain.Name - from Strain, StrainXRef, InbredSet - where Strain.Id = StrainXRef.StrainId and - StrainXRef.InbredSetId = InbredSet.Id - and InbredSet.Name=%s; - """, "BXD") - - strains = [strain[0] for strain in cursor.fetchall()] - print("strains:", pf(strains)) - for strain in strains: - print(" -", strain) - - return strains - - -def get_probeset_vals(cursor, dataset_name): - cursor.execute(""" select ProbeSet.Id, ProbeSet.Name - from ProbeSetXRef, - ProbeSetFreeze, - ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and - ProbeSetFreeze.Name = %s and - ProbeSetXRef.ProbeSetId = ProbeSet.Id; - """, dataset_name) - - probesets = cursor.fetchall() - - print("Fetched probesets") - - probeset_vals = collections.OrderedDict() - - for counter, probeset in enumerate(probesets): - cursor.execute(""" select Strain.Name, ProbeSetData.value - from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain - where ProbeSetData.Id = ProbeSetXRef.DataId - and ProbeSetData.StrainId = Strain.Id - and ProbeSetXRef.ProbeSetId = %s - and ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId - and ProbeSetFreeze.Name = %s; - """, (probeset[0], dataset_name)) - val_dic = collections.OrderedDict() - vals = cursor.fetchall() - for val in vals: - val_dic[val[0]] = val[1] - - probeset_vals[probeset[1]] = val_dic - show_progress("Querying DB", counter) - - return probeset_vals - - -def trim_strains(strains, probeset_vals): - trimmed_strains = [] - #print("probeset_vals is:", pf(probeset_vals)) - first_probeset = list(probeset_vals.values())[0] - print("\n**** first_probeset is:", pf(first_probeset)) - for strain in strains: - print("\n**** strain is:", pf(strain)) - if strain in first_probeset: - trimmed_strains.append(strain) - print("trimmed_strains:", pf(trimmed_strains)) - return trimmed_strains - - -def write_data_matrix_file(strains, probeset_vals, filename): - with open(filename, "wb") as fh: - csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) - #print("strains is:", pf(strains)) - csv_writer.writerow(['ID'] + strains) - for counter, probeset in enumerate(probeset_vals): - row_data = [probeset] - for strain in strains: - #print("probeset is: ", pf(probeset_vals[probeset])) - row_data.append(probeset_vals[probeset][strain]) - #print("row_data is: ", pf(row_data)) - csv_writer.writerow(row_data) - show_progress("Writing", counter) - - -def main(): - filename = os.path.expanduser( - "~/gene/wqflask/maintenance/" - "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" - "(Oct08)_RankInv_Beta.txt") - dataset_name = "Eye_AXBXA_1008_RankInv" - - with database_connection(get_setting("SQL_URI")) as conn: - with conn.cursor() as cursor: - strains = get_strains(cursor) - print("Getting probset_vals") - probeset_vals = get_probeset_vals(cursor, dataset_name) - print("Finished getting probeset_vals") - trimmed_strains = trim_strains(strains, probeset_vals) - write_data_matrix_file(trimmed_strains, probeset_vals, filename) - - -if __name__ == '__main__': - main() diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py deleted file mode 100644 index 32e0e34b..00000000 --- a/wqflask/maintenance/geno_to_json.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/python - -""" -Convert .geno files to json - -This file goes through all of the genofiles in the genofile directory (.geno) -and converts them to json files that are used when running the marker regression -code - -""" - -import sys -sys.path.append("..") -import os -import glob -import traceback -import gzip - -#import numpy as np -#from pyLMM import lmm - -import simplejson as json - -from pprint import pformat as pf - -#from utility.tools import flat_files - - -class EmptyConfigurations(Exception): - pass - - -class Marker: - def __init__(self): - self.name = None - self.chr = None - self.cM = None - self.Mb = None - self.genotypes = [] - - -class ConvertGenoFile: - - def __init__(self, input_file, output_file): - - self.input_file = input_file - self.output_file = output_file - - self.mb_exists = False - self.cm_exists = False - self.markers = [] - - self.latest_row_pos = None - self.latest_col_pos = None - - self.latest_row_value = None - self.latest_col_value = None - - def convert(self): - - self.haplotype_notation = { - '@mat': "1", - '@pat': "0", - '@het': "0.5", - '@unk': "NA" - } - - self.configurations = {} - #self.skipped_cols = 3 - - # if self.input_file.endswith(".geno.gz"): - # print("self.input_file: ", self.input_file) - # self.input_fh = gzip.open(self.input_file) - # else: - self.input_fh = open(self.input_file) - - with open(self.output_file, "w") as self.output_fh: - # if self.file_type == "geno": - self.process_csv() - # elif self.file_type == "snps": - # self.process_snps_file() - - def process_csv(self): - for row_count, row in enumerate(self.process_rows()): - row_items = row.split("\t") - - this_marker = Marker() - this_marker.name = row_items[1] - this_marker.chr = row_items[0] - if self.cm_exists and self.mb_exists: - this_marker.cM = row_items[2] - this_marker.Mb = row_items[3] - genotypes = row_items[4:] - elif self.cm_exists: - this_marker.cM = row_items[2] - genotypes = row_items[3:] - elif self.mb_exists: - this_marker.Mb = row_items[2] - genotypes = row_items[3:] - else: - genotypes = row_items[2:] - for item_count, genotype in enumerate(genotypes): - if genotype.upper() in self.configurations: - this_marker.genotypes.append( - self.configurations[genotype.upper()]) - else: - this_marker.genotypes.append("NA") - - #print("this_marker is:", pf(this_marker.__dict__)) - # if this_marker.chr == "14": - self.markers.append(this_marker.__dict__) - - with open(self.output_file, 'w') as fh: - json.dump(self.markers, fh, indent=" ", sort_keys=True) - - # print('configurations:', str(configurations)) - #self.latest_col_pos = item_count + self.skipped_cols - #self.latest_col_value = item - - # if item_count != 0: - # self.output_fh.write(" ") - # self.output_fh.write(self.configurations[item.upper()]) - - # self.output_fh.write("\n") - - def process_rows(self): - for self.latest_row_pos, row in enumerate(self.input_fh): - # if self.input_file.endswith(".geno.gz"): - # print("row: ", row) - self.latest_row_value = row - # Take care of headers - if not row.strip(): - continue - if row.startswith('#'): - continue - if row.startswith('Chr'): - if 'Mb' in row.split(): - self.mb_exists = True - if 'cM' in row.split(): - self.cm_exists = True - continue - if row.startswith('@'): - key, _separater, value = row.partition(':') - key = key.strip() - value = value.strip() - if key in self.haplotype_notation: - self.configurations[value] = self.haplotype_notation[key] - continue - if not len(self.configurations): - raise EmptyConfigurations - yield row - - @classmethod - def process_all(cls, old_directory, new_directory): - os.chdir(old_directory) - for input_file in glob.glob("*"): - if not input_file.endswith(('geno', '.geno.gz')): - continue - group_name = ".".join(input_file.split('.')[:-1]) - output_file = os.path.join(new_directory, group_name + ".json") - print("%s -> %s" % ( - os.path.join(old_directory, input_file), output_file)) - convertob = ConvertGenoFile(input_file, output_file) - try: - convertob.convert() - except EmptyConfigurations as why: - print(" No config info? Continuing...") - #excepted = True - continue - except Exception as why: - - print(" Exception:", why) - print(traceback.print_exc()) - print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) - print(" Column is:", convertob.latest_col_value) - print(" Row is:", convertob.latest_row_value) - break - - # def process_snps_file(cls, snps_file, new_directory): - # output_file = os.path.join(new_directory, "mouse_families.json") - # print("%s -> %s" % (snps_file, output_file)) - # convertob = ConvertGenoFile(input_file, output_file) - - -if __name__ == "__main__": - Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" - New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" - #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" - #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" - #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - # convertob.convert() - ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - # ConvertGenoFiles(Geno_Directory) - - #process_csv(Input_File, Output_File) diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py deleted file mode 100644 index 0a450d3f..00000000 --- a/wqflask/maintenance/get_group_samplelists.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import glob -import gzip - -from base import webqtlConfig - - -def get_samplelist(file_type, geno_file): - if file_type == "geno": - return get_samplelist_from_geno(geno_file) - elif file_type == "plink": - return get_samplelist_from_plink(geno_file) - - -def get_samplelist_from_geno(genofilename): - if os.path.isfile(genofilename + '.gz'): - genofilename += '.gz' - genofile = gzip.open(genofilename) - else: - genofile = open(genofilename) - - for line in genofile: - line = line.strip() - if not line: - continue - if line.startswith(("#", "@")): - continue - break - - headers = line.split("\t") - - if headers[3] == "Mb": - samplelist = headers[4:] - else: - samplelist = headers[3:] - return samplelist - - -def get_samplelist_from_plink(genofilename): - genofile = open(genofilename) - - samplelist = [] - for line in genofile: - line = line.split(" ") - samplelist.append(line[1]) - - return samplelist diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py deleted file mode 100644 index 9d12da8a..00000000 --- a/wqflask/maintenance/print_benchmark.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python - -import time - -from pprint import pformat as pf - - -class TheCounter: - Counters = {} - - def __init__(self): - start_time = time.time() - for counter in range(170000): - self.print_it(counter) - self.time_took = time.time() - start_time - TheCounter.Counters[self.__class__.__name__] = self.time_took - - -class PrintAll(TheCounter): - def print_it(self, counter): - print(counter) - - -class PrintSome(TheCounter): - def print_it(self, counter): - if counter % 1000 == 0: - print(counter) - - -class PrintNone(TheCounter): - def print_it(self, counter): - pass - - -def new_main(): - print("Running new_main") - tests = [PrintAll, PrintSome, PrintNone] - for test in tests: - test() - - print(pf(TheCounter.Counters)) - - -if __name__ == '__main__': - new_main() diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py deleted file mode 100644 index 36049a82..00000000 --- a/wqflask/maintenance/quantile_normalize.py +++ /dev/null @@ -1,98 +0,0 @@ -import sys -sys.path.insert(0, './') -import urllib.parse - -import numpy as np -import pandas as pd - -from flask import Flask, g, request - -from wqflask import app -from wqflask.database import database_connection -from utility.tools import get_setting - - -def create_dataframe(input_file): - with open(input_file) as f: - ncols = len(f.readline().split("\t")) - - input_array = np.loadtxt(open( - input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) - return pd.DataFrame(input_array) - -# This function taken from https://github.com/ShawnLYU/Quantile_Normalize - - -def quantileNormalize(df_input): - df = df_input.copy() - # compute rank - dic = {} - for col in df: - dic.update({col: sorted(df[col])}) - sorted_df = pd.DataFrame(dic) - rank = sorted_df.mean(axis=1).tolist() - # sort - for col in df: - t = np.searchsorted(np.sort(df[col]), df[col]) - df[col] = [rank[i] for i in t] - return df - - -def set_data(cursor, dataset_name): - orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" - - sample_list = [] - with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh: - for i, (line1, line2) in enumerate(zip(orig_fh, quant_fh)): - trait_dict = {} - sample_list = [] - if i == 0: - sample_names = line1.split('\t')[1:] - else: - trait_name = line1.split('\t')[0] - for i, sample in enumerate(sample_names): - this_sample = { - "name": sample, - "value": line1.split('\t')[i + 1], - "qnorm": line2.split('\t')[i + 1] - } - sample_list.append(this_sample) - query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName - FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet - WHERE Species.Id = InbredSet.SpeciesId and - InbredSet.Id = ProbeFreeze.InbredSetId and - ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and - ProbeSetFreeze.Name = '%s' and - ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and - ProbeSetXRef.ProbeSetId = ProbeSet.Id and - ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0]) - cursor.execute(query) - result_info = cursor.fetchone() - - yield { - "_index": "traits", - "_type": "trait", - "_source": { - "name": trait_name, - "species": result_info[0], - "group": result_info[1], - "dataset": dataset_name, - "dataset_fullname": result_info[2], - "samples": sample_list, - "transform_types": "qnorm" - } - } - - -if __name__ == '__main__': - with database_connection(get_setting("SQL_URI")) as conn: - with conn.cursor() as cursor: - success, _ = bulk(es, set_data(cursor, sys.argv[1])) - - response = es.search( - index="traits", doc_type="trait", body={ - "query": {"match": {"name": "ENSMUSG00000028982"}} - } - ) - - print(response) diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py deleted file mode 100644 index cebe33c0..00000000 --- a/wqflask/maintenance/set_resource_defaults.py +++ /dev/null @@ -1,153 +0,0 @@ -""" - -Script that sets default resource access masks for use with the DB proxy - -Defaults will be: -Owner - omni_gn -Mask - Public/non-confidential: { data: "view", - metadata: "view", - admin: "not-admin" } - Private/confidentia: { data: "no-access", - metadata: "no-access", - admin: "not-admin" } - -To run: -./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py - -""" - -import sys -import json - -# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead -sys.path.insert(0, './') - -# NEW: import app to avoid a circular dependency on utility.tools -from wqflask import app - -from utility import hmac -from utility.tools import get_setting -from utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info -Redis = get_redis_conn() - -import urllib.parse - -from wqflask.database import database_connection - - -def insert_probeset_resources(cursor, default_owner_id): - current_resources = Redis.hgetall("resources") - cursor.execute(""" SELECT - ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public - FROM - ProbeSetFreeze""") - - resource_results = cursor.fetchall() - for i, resource in enumerate(resource_results): - resource_ob = {} - resource_ob['name'] = resource[1] - resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = {"dataset": str(resource[0])} - resource_ob['type'] = "dataset-probeset" - if resource[2] < 1 and resource[3] > 0: - resource_ob['default_mask'] = {"data": "view", - "metadata": "view", - "admin": "not-admin"} - else: - resource_ob['default_mask'] = {"data": "no-access", - "metadata": "no-access", - "admin": "not-admin"} - resource_ob['group_masks'] = {} - - add_resource(resource_ob, update=False) - - -def insert_publish_resources(cursor, default_owner_id): - current_resources = Redis.hgetall("resources") - cursor.execute(""" SELECT - PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode - FROM - PublishXRef, PublishFreeze, InbredSet, Publication - WHERE - PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND - InbredSet.Id = PublishXRef.InbredSetId AND - Publication.Id = PublishXRef.PublicationId""") - - resource_results = cursor.fetchall() - for resource in resource_results: - if resource[2]: - resource_ob = {} - if resource[2]: - resource_ob['name'] = resource[2] + "_" + str(resource[0]) - else: - resource_ob['name'] = str(resource[0]) - resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = {"dataset": str(resource[1]), - "trait": str(resource[0])} - resource_ob['type'] = "dataset-publish" - resource_ob['default_mask'] = {"data": "view", - "metadata": "view", - "admin": "not-admin"} - - resource_ob['group_masks'] = {} - - add_resource(resource_ob, update=False) - else: - continue - - -def insert_geno_resources(cursor, default_owner_id): - current_resources = Redis.hgetall("resources") - cursor.execute(""" SELECT - GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality - FROM - GenoFreeze""") - - resource_results = cursor.fetchall() - for i, resource in enumerate(resource_results): - resource_ob = {} - resource_ob['name'] = resource[1] - if resource[1] == "HET3-ITPGeno": - resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" - else: - resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = {"dataset": str(resource[0])} - resource_ob['type'] = "dataset-geno" - if resource[2] < 1: - resource_ob['default_mask'] = {"data": "view", - "metadata": "view", - "admin": "not-admin"} - else: - resource_ob['default_mask'] = {"data": "no-access", - "metadata": "no-access", - "admin": "not-admin"} - resource_ob['group_masks'] = {} - - add_resource(resource_ob, update=False) - - -def insert_resources(default_owner_id): - current_resources = get_resources() - print("START") - insert_publish_resources(cursor, default_owner_id) - print("AFTER PUBLISH") - insert_geno_resources(cursor, default_owner_id) - print("AFTER GENO") - insert_probeset_resources(cursor, default_owner_id) - print("AFTER PROBESET") - - -def main(cursor): - """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" - - Redis.delete("resources") - - owner_id = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" - - insert_resources(owner_id) - - -if __name__ == '__main__': - with database_connection(get_setting("SQL_URI")) as conn: - with conn.cursor() as cursor: - main(cursor) |