diff options
author | Arun Isaac | 2023-12-29 18:55:37 +0000 |
---|---|---|
committer | Arun Isaac | 2023-12-29 19:01:46 +0000 |
commit | 204a308be0f741726b9a620d88fbc22b22124c81 (patch) | |
tree | b3cf66906674020b530c844c2bb4982c8a0e2d39 /gn2/maintenance | |
parent | 83062c75442160427b50420161bfcae2c5c34c84 (diff) | |
download | genenetwork2-204a308be0f741726b9a620d88fbc22b22124c81.tar.gz |
Namespace all modules under gn2.
We move all modules under a gn2 directory. This is important for
"correct" packaging and deployment as a Guix service.
Diffstat (limited to 'gn2/maintenance')
-rw-r--r-- | gn2/maintenance/README.md | 4 | ||||
-rw-r--r-- | gn2/maintenance/__init__.py | 0 | ||||
-rw-r--r-- | gn2/maintenance/convert_dryad_to_bimbam.py | 72 | ||||
-rw-r--r-- | gn2/maintenance/convert_geno_to_bimbam.py | 201 | ||||
-rw-r--r-- | gn2/maintenance/gen_ind_genofiles.py | 253 | ||||
-rw-r--r-- | gn2/maintenance/gen_select_dataset.py | 296 | ||||
-rw-r--r-- | gn2/maintenance/generate_kinship_from_bimbam.py | 66 | ||||
-rw-r--r-- | gn2/maintenance/generate_probesetfreeze_file.py | 122 | ||||
-rw-r--r-- | gn2/maintenance/geno_to_json.py | 196 | ||||
-rw-r--r-- | gn2/maintenance/get_group_samplelists.py | 47 | ||||
-rw-r--r-- | gn2/maintenance/print_benchmark.py | 45 | ||||
-rw-r--r-- | gn2/maintenance/quantile_normalize.py | 98 | ||||
-rw-r--r-- | gn2/maintenance/set_resource_defaults.py | 153 |
13 files changed, 1553 insertions, 0 deletions
diff --git a/gn2/maintenance/README.md b/gn2/maintenance/README.md new file mode 100644 index 00000000..873eaa32 --- /dev/null +++ b/gn2/maintenance/README.md @@ -0,0 +1,4 @@ +Maintenance files have been moved into a separate repository named +*gn_extra*. See https://github.com/genenetwork/gn_extra + + diff --git a/gn2/maintenance/__init__.py b/gn2/maintenance/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/gn2/maintenance/__init__.py diff --git a/gn2/maintenance/convert_dryad_to_bimbam.py b/gn2/maintenance/convert_dryad_to_bimbam.py new file mode 100644 index 00000000..18fbb8a1 --- /dev/null +++ b/gn2/maintenance/convert_dryad_to_bimbam.py @@ -0,0 +1,72 @@ +#!/usr/bin/python + +""" +Convert data dryad files to a BIMBAM _geno and _snps file + + +""" + +import sys +sys.path.append("..") + + +def read_dryad_file(filename): + exclude_count = 0 + marker_list = [] + sample_dict = {} + sample_list = [] + geno_rows = [] + with open(filename, 'r') as the_file: + for i, line in enumerate(the_file): + if i > 0: + if line.split(" ")[1] == "no": + sample_name = line.split(" ")[0] + sample_list.append(sample_name) + sample_dict[sample_name] = line.split(" ")[2:] + else: + exclude_count += 1 + else: + marker_list = line.split(" ")[2:] + + for i, marker in enumerate(marker_list): + this_row = [] + this_row.append(marker) + this_row.append("X") + this_row.append("Y") + for sample in sample_list: + this_row.append(sample_dict[sample][i]) + geno_rows.append(this_row) + + print(exclude_count) + + return geno_rows + + # for i, marker in enumerate(marker_list): + # this_row = [] + # this_row.append(marker) + # this_row.append("X") + # this_row.append("Y") + # with open(filename, 'r') as the_file: + # for j, line in enumerate(the_file): + # if j > 0: + # this_row.append(line.split(" ")[i+2]) + # print("row: " + str(i)) + # geno_rows.append(this_row) + # + # return geno_rows + + +def write_bimbam_files(geno_rows): + with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: + for row in geno_rows: + geno_fh.write(", ".join(row) + "\n") + + +def convert_dryad_to_bimbam(filename): + geno_file_rows = read_dryad_file(filename) + write_bimbam_files(geno_file_rows) + + +if __name__ == "__main__": + input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" + convert_dryad_to_bimbam(input_filename) diff --git a/gn2/maintenance/convert_geno_to_bimbam.py b/gn2/maintenance/convert_geno_to_bimbam.py new file mode 100644 index 00000000..078be529 --- /dev/null +++ b/gn2/maintenance/convert_geno_to_bimbam.py @@ -0,0 +1,201 @@ +#!/usr/bin/python + +""" +Convert .geno files to json + +This file goes through all of the genofiles in the genofile directory (.geno) +and converts them to json files that are used when running the marker regression +code + +""" + +import sys +sys.path.append("..") +import os +import glob +import traceback +import gzip + +import simplejson as json + +from pprint import pformat as pf + + +class EmptyConfigurations(Exception): + pass + + +class Marker: + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] + + +class ConvertGenoFile: + + def __init__(self, input_file, output_files): + self.input_file = input_file + self.output_files = output_files + + self.mb_exists = False + self.cm_exists = False + self.markers = [] + + self.latest_row_pos = None + self.latest_col_pos = None + + self.latest_row_value = None + self.latest_col_value = None + + def convert(self): + self.haplotype_notation = { + '@mat': "1", + '@pat': "0", + '@het': "0.5", + '@unk': "NA" + } + + self.configurations = {} + self.input_fh = open(self.input_file) + + self.process_csv() + + def process_csv(self): + for row in self.process_rows(): + row_items = row.split("\t") + + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) + else: + this_marker.genotypes.append("NA") + + self.markers.append(this_marker.__dict__) + + self.write_to_bimbam() + + def write_to_bimbam(self): + with open(self.output_files[0], "w") as geno_fh: + for marker in self.markers: + geno_fh.write(marker['name']) + geno_fh.write(", X, Y") + geno_fh.write(", " + ", ".join(marker['genotypes'])) + geno_fh.write("\n") + + with open(self.output_files[1], "w") as pheno_fh: + for sample in self.sample_list: + pheno_fh.write("1\n") + + with open(self.output_files[2], "w") as snp_fh: + for marker in self.markers: + if self.mb_exists: + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") + else: + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") + + def get_sample_list(self, row_contents): + self.sample_list = [] + if self.mb_exists: + if self.cm_exists: + self.sample_list = row_contents[4:] + else: + self.sample_list = row_contents[3:] + else: + if self.cm_exists: + self.sample_list = row_contents[3:] + else: + self.sample_list = row_contents[2:] + + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + self.get_sample_list(row.split()) + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key == "@filler": + raise EmptyConfigurations + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row + + @classmethod + def process_all(cls, old_directory, new_directory): + os.chdir(old_directory) + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue + group_name = ".".join(input_file.split('.')[:-1]) + if group_name == "HSNIH-Palmer": + continue + geno_output_file = os.path.join( + new_directory, group_name + "_geno.txt") + pheno_output_file = os.path.join( + new_directory, group_name + "_pheno.txt") + snp_output_file = os.path.join( + new_directory, group_name + "_snps.txt") + output_files = [geno_output_file, + pheno_output_file, snp_output_file] + print("%s -> %s" % ( + os.path.join(old_directory, input_file), geno_output_file)) + convertob = ConvertGenoFile(input_file, output_files) + try: + convertob.convert() + except EmptyConfigurations as why: + print(" No config info? Continuing...") + continue + except Exception as why: + print(" Exception:", why) + print(traceback.print_exc()) + print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, + convertob.latest_col_pos)) + print(" Column is:", convertob.latest_col_value) + print(" Row is:", convertob.latest_row_value) + break + + +if __name__ == "__main__": + Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" + New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" + #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" + #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" + #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") + # convertob.convert() + ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) + # ConvertGenoFiles(Geno_Directory) diff --git a/gn2/maintenance/gen_ind_genofiles.py b/gn2/maintenance/gen_ind_genofiles.py new file mode 100644 index 00000000..b755c648 --- /dev/null +++ b/gn2/maintenance/gen_ind_genofiles.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +"""A script that generates the genotype files for groups of individuals, using an existing strain genotype file as a basis + +Example commands: +python3 gen_ind_genofiles.py + /home/zas1024/gn2-zach/genotype_files/genotype/ + /home/zas1024/gn2-zach/new_geno/ + BXD-Micturition.geno + BXD.json +python3 gen_ind_genofiles.py + /home/zas1024/gn2-zach/genotype_files/genotype + /home/zas1024/gn2-zach/new_geno/ + BXD-Micturition.geno + BXD.2.geno BXD.4.geno BXD.5.geno + +""" + +import json +import os +import sys +from typing import List + +import MySQLdb + +def conn(): + return MySQLdb.Connect(db=os.environ.get("DB_NAME"), + user=os.environ.get("DB_USER"), + passwd=os.environ.get("DB_PASS"), + host=os.environ.get("DB_HOST")) + +def main(args): + + # Directory in which .geno files are located + geno_dir = args[1] + + # Directory in which to output new files + out_dir = args[2] + + # The individuals group that we want to generate a .geno file for + target_file = geno_dir + args[3] + + # The source group(s) we're generating the .geno files from + # This can be passed as either a specific .geno file (or set of files as multiple arguments), + # or as a JSON file containing a set of .geno files (and their corresponding file names and sample lists) + geno_json = {} + source_files = [] + if ".json" in args[4]: + geno_json = json.load(open(geno_dir + args[4], "r")) + par_f1s = { + "mat": geno_json['mat'], + "pat": geno_json['pat'], + "f1s": geno_json['f1s'] + } + + # List of file titles and locations from JSON + source_files = [{'title': genofile['title'], 'location': geno_dir + genofile['location']} for genofile in geno_json['genofile']] + else: + par_f1s = {} + # List of files directly taken from command line arguments, with titles just set to the filename + for group in args[4:]: + file_name = geno_dir + group + ".geno" if ".geno" not in group else geno_dir + group + source_files.append({'title': file_name[:-5], 'location': file_name}) + + if len(source_files) > 1: + # Generate a JSON file pointing to the new target genotype files, in situations where there are multiple source .geno files + target_json_loc = out_dir + ".".join(args[3].split(".")[:-1]) + ".json" + target_json = {'genofile': []} + + # Generate the output .geno files + for source_file in source_files: + filename, samples = generate_new_genofile(source_file['location'], target_file, par_f1s, out_dir) + + target_json['genofile'].append({ + 'location': filename.split("/")[-1], + 'title': source_file['title'], + 'sample_list': samples + }) + + json.dump(target_json, open(target_json_loc, "w")) + else: + filename, samples = generate_new_genofile(source_files[0]['location'], target_file, par_f1s, out_dir) + +def get_strain_for_sample(sample): + query = ( + "SELECT CaseAttributeXRefNew.Value " + "FROM CaseAttributeXRefNew, Strain " + "WHERE CaseAttributeXRefNew.CaseAttributeId=11 " + "AND CaseAttributeXRefNew.StrainId = Strain.Id " + "AND Strain.Name = %(name)s" ) + + with conn().cursor() as cursor: + cursor.execute(query, {"name": sample.strip()}) + strain = cursor.fetchone()[0] + return strain + +def generate_new_genofile(source_genofile, target_genofile, par_f1s, out_dir): + source_samples = group_samples(source_genofile) + source_genotypes = strain_genotypes(source_genofile) + target_samples = group_samples(target_genofile) + strain_pos_map = map_strain_pos_to_target_group(source_samples, target_samples, par_f1s) + + if len(source_genofile.split("/")[-1].split(".")) > 2: + # The number in the source genofile; for example 4 in BXD.4.geno + source_num = source_genofile.split("/")[-1].split(".")[-2] + target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + "." + source_num + ".geno" + else: + target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + ".geno" + + file_location = out_dir + target_filename + + with open(file_location, "w") as fh: + for metadata in ["name", "type", "mat", "pat", "het", "unk"]: + fh.write("@" + metadata + ":" + source_genotypes[metadata] + "\n") + + header_line = ["Chr", "Locus", "cM", "Mb"] + target_samples + fh.write("\t".join(header_line) + "\n") + + for marker in source_genotypes['markers']: + line_items = [ + marker['Chr'], + marker['Locus'], + marker['cM'], + marker['Mb'] + ] + + for pos in strain_pos_map: + if isinstance(pos, int): + line_items.append(marker['genotypes'][pos]) + else: + if pos in ["mat", "pat"]: + line_items.append(source_genotypes[pos]) + elif pos == "f1s": + line_items.append("H") + else: + line_items.append("U") + + fh.write("\t".join(line_items) + "\n") + + return file_location, target_samples + +def map_strain_pos_to_target_group(source_samples, target_samples, par_f1s): + """ + Retrieve corresponding strain position for each sample in the target group + + This is so the genotypes from the base genofile can be mapped to the samples in the target group + + For example: + Base strains: BXD1, BXD2, BXD3 + Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3 + Returns: [0, 0, 1, 2, 2, 2] + """ + pos_map = [] + for sample in target_samples: + sample_strain = get_strain_for_sample(sample) + if sample_strain in source_samples: + pos_map.append(source_samples.index(sample_strain)) + else: + val = "U" + for key in par_f1s.keys(): + if sample_strain in par_f1s[key]: + val = key + pos_map.append(val) + + return pos_map + +def group_samples(target_file: str) -> List: + """ + Get the group samples from its "dummy" .geno file (which still contains the sample list) + """ + + sample_list = [] + with open(target_file, "r") as target_geno: + for i, line in enumerate(target_geno): + # Skip header lines + if line[0] in ["#", "@"] or not len(line): + continue + + line_items = line.split() + + sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]] + break + + return sample_list + +def strain_genotypes(strain_genofile: str) -> List: + """ + Read genotypes from source strain .geno file + + :param strain_genofile: string of genofile filename + :return: a list of dictionaries representing each marker's genotypes + + Example output: [ + { + 'Chr': '1', + 'Locus': 'marker1', + 'Mb': '10.0', + 'cM': '8.0', + 'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...] + }, + ... + ] + """ + + geno_dict = {} + + geno_start_col = None + header_columns = [] + sample_list = [] + markers = [] + with open(strain_genofile, "r") as source_geno: + for i, line in enumerate(source_geno): + if line[0] == "@": + metadata_type = line[1:].split(":")[0] + if metadata_type in ['name', 'type', 'mat', 'pat', 'het', 'unk']: + geno_dict[metadata_type] = line.split(":")[1].strip() + + continue + + # Skip other header lines + if line[0] == "#" or not len(line): + continue + + line_items = line.split("\t") + if "Chr" in line_items: # Header row + # Get the first column index containing genotypes + header_columns = line_items + for j, item in enumerate(line_items): + if item not in ["Chr", "Locus", "Mb", "cM"]: + geno_start_col = j + break + + sample_list = line_items[geno_start_col:] + if not geno_start_col: + print("Check .geno file - expected columns not found") + sys.exit() + else: # Marker rows + this_marker = { + 'Chr': line_items[header_columns.index("Chr")], + 'Locus': line_items[header_columns.index("Locus")], + 'Mb': line_items[header_columns.index("Mb")], + 'cM': line_items[header_columns.index("cM")], + 'genotypes': [item.strip() for item in line_items][geno_start_col:] + } + + markers.append(this_marker) + + geno_dict['markers'] = markers + + return geno_dict + +if __name__ == "__main__": + main(sys.argv) + diff --git a/gn2/maintenance/gen_select_dataset.py b/gn2/maintenance/gen_select_dataset.py new file mode 100644 index 00000000..5f41da29 --- /dev/null +++ b/gn2/maintenance/gen_select_dataset.py @@ -0,0 +1,296 @@ +"""Script that generates the data for the main dropdown menus on the home page + +Writes out data as /static/new/javascript/dataset_menu_structure.json +It needs to be run manually when database has been changed. Run it as + + ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py + +""" + + +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams +# at rwilliams@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) + +import sys + +# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead +sys.path.insert(0, './') +# NEW: import app to avoid a circular dependency on utility.tools +from gn2.wqflask import app + +from gn2.utility.tools import get_setting + +import simplejson as json +import urllib.parse + + +from pprint import pformat as pf + +from gn2.wqflask.database import database_connection + + +def get_species(cursor): + """Build species list""" + #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") + cursor.execute("select Name, MenuName from Species order by OrderId") + species = list(cursor.fetchall()) + return species + + +def get_groups(cursor, species): + """Build groups list""" + groups = {} + for species_name, _species_full_name in species: + cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, + Species, + ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' + and InbredSet.SpeciesId = Species.Id and + (PublishFreeze.InbredSetId = InbredSet.Id + or GenoFreeze.InbredSetId = InbredSet.Id + or ProbeFreeze.InbredSetId = InbredSet.Id) + group by InbredSet.Name + order by InbredSet.FullName""" % species_name) + results = cursor.fetchall() + groups[species_name] = list(results) + return groups + + +def get_types(groups): + """Build types list""" + types = {} + #print("Groups: ", pf(groups)) + for species, group_dict in list(groups.items()): + types[species] = {} + for group_name, _group_full_name in group_dict: + # make group an alias to shorten the code + #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] + if phenotypes_exist(group_name): + types[species][group_name] = [("Phenotypes", "Phenotypes")] + if genotypes_exist(group_name): + if group_name in types[species]: + types[species][group_name] += [("Genotypes", "Genotypes")] + else: + types[species][group_name] = [("Genotypes", "Genotypes")] + if group_name in types[species]: + types_list = build_types(species, group_name) + if len(types_list) > 0: + types[species][group_name] += types_list + else: + if not phenotypes_exist(group_name) and not genotypes_exist(group_name): + types[species].pop(group_name, None) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) + else: # ZS: This whole else statement might be unnecessary, need to check + types_list = build_types(species, group_name) + if len(types_list) > 0: + types[species][group_name] = types_list + else: + types[species].pop(group_name, None) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) + return types + + +def phenotypes_exist(group_name): + #print("group_name:", group_name) + Cursor.execute("""select Name from PublishFreeze + where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) + + results = Cursor.fetchone() + #print("RESULTS:", results) + + if results != None: + return True + else: + return False + + +def genotypes_exist(group_name): + #print("group_name:", group_name) + Cursor.execute("""select Name from GenoFreeze + where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) + + results = Cursor.fetchone() + #print("RESULTS:", results) + + if results != None: + return True + else: + return False + + +def build_types(species, group): + """Fetches tissues + + Gets the tissues with data for this species/group + (all types except phenotype/genotype are tissues) + + """ + + Cursor.execute("""select distinct Tissue.Name + from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species + where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '%s' and + ProbeFreeze.TissueId = Tissue.Id and + ProbeFreeze.InbredSetId = InbredSet.Id and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and + ProbeSetFreeze.public > 0 and + ProbeSetFreeze.confidentiality < 1 + order by Tissue.Name""" % (species, group)) + + results = [] + for result in Cursor.fetchall(): + if len(result): + these_datasets = build_datasets(species, group, result[0]) + if len(these_datasets) > 0: + results.append((result[0], result[0])) + + return results + + +def get_datasets(types): + """Build datasets list""" + datasets = {} + for species, group_dict in list(types.items()): + datasets[species] = {} + for group, type_list in list(group_dict.items()): + datasets[species][group] = {} + for type_name in type_list: + these_datasets = build_datasets(species, group, type_name[0]) + if len(these_datasets) > 0: + datasets[species][group][type_name[0]] = these_datasets + + return datasets + + +def build_datasets(species, group, type_name): + """Gets dataset names from database""" + dataset_text = dataset_value = None + datasets = [] + if type_name == "Phenotypes": + Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where + InbredSet.Name = '%s' and + PublishFreeze.InbredSetId = InbredSet.Id and + InfoFiles.InfoPageName = PublishFreeze.Name order by + PublishFreeze.CreateTime asc""" % group) + + results = Cursor.fetchall() + if len(results) > 0: + for result in results: + print(result) + dataset_id = str(result[0]) + dataset_value = str(result[1]) + if group == 'MDP': + dataset_text = "Mouse Phenome Database" + else: + #dataset_text = "%s Phenotypes" % group + dataset_text = str(result[2]) + datasets.append((dataset_id, dataset_value, dataset_text)) + else: + dataset_id = "None" + dataset_value = "%sPublish" % group + dataset_text = "%s Phenotypes" % group + datasets.append((dataset_id, dataset_value, dataset_text)) + + elif type_name == "Genotypes": + Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where + InbredSet.Name = '%s' and + GenoFreeze.InbredSetId = InbredSet.Id and + InfoFiles.InfoPageName = GenoFreeze.ShortName and + GenoFreeze.public > 0 and + GenoFreeze.confidentiality < 1 order by + GenoFreeze.CreateTime desc""" % group) + + results = Cursor.fetchone() + if results != None: + dataset_id = str(results[0]) + else: + dataset_id = "None" + dataset_value = "%sGeno" % group + dataset_text = "%s Genotypes" % group + datasets.append((dataset_id, dataset_value, dataset_text)) + + else: # for mRNA expression/ProbeSet + Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from + ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where + Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '%s' and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and + ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and + ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by + ProbeSetFreeze.CreateTime desc""" % (species, group, type_name)) + + dataset_results = Cursor.fetchall() + datasets = [] + for dataset_info in dataset_results: + this_dataset_info = [] + for info in dataset_info: + this_dataset_info.append(str(info)) + datasets.append(this_dataset_info) + + return datasets + + +def main(cursor): + """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" + + species = get_species(cursor) + groups = get_groups(cursor, species) + types = get_types(groups) + datasets = get_datasets(types) + + #species.append(('All Species', 'All Species')) + #groups['All Species'] = [('All Groups', 'All Groups')] + #types['All Species'] = {} + #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] + #datasets['All Species'] = {} + #datasets['All Species']['All Groups'] = {} + #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] + + data = dict(species=species, + groups=groups, + types=types, + datasets=datasets, + ) + + #print("data:", data) + + output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json""" + + with open(output_file, 'w') as fh: + json.dump(data, fh, indent=" ", sort_keys=True) + + #print("\nWrote file to:", output_file) + + +def _test_it(): + """Used for internal testing only""" + types = build_types("Mouse", "BXD") + #print("build_types:", pf(types)) + datasets = build_datasets("Mouse", "BXD", "Hippocampus") + #print("build_datasets:", pf(datasets)) + + +if __name__ == '__main__': + with database_connection(get_setting("SQL_URI")) as conn: + with conn.cursor() as cursor: + main(cursor) diff --git a/gn2/maintenance/generate_kinship_from_bimbam.py b/gn2/maintenance/generate_kinship_from_bimbam.py new file mode 100644 index 00000000..9f01d094 --- /dev/null +++ b/gn2/maintenance/generate_kinship_from_bimbam.py @@ -0,0 +1,66 @@ +#!/usr/bin/python + +""" +Generate relatedness matrix files for GEMMA from BIMBAM genotype/phenotype files + +This file goes through all of the BIMBAM files in the bimbam diretory +and uses GEMMA to generate their corresponding kinship/relatedness matrix file + +""" + +import sys +sys.path.append("..") +import os +import glob + + +class GenerateKinshipMatrices: + def __init__(self, group_name, geno_file, pheno_file): + self.group_name = group_name + self.geno_file = geno_file + self.pheno_file = pheno_file + + def generate_kinship(self): + gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \ + " -p " + self.pheno_file + \ + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name + print("command:", gemma_command) + os.system(gemma_command) + + @classmethod + def process_all(self, geno_dir, bimbam_dir): + os.chdir(geno_dir) + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue + group_name = ".".join(input_file.split('.')[:-1]) + if group_name == "HSNIH-Palmer": + continue + geno_input_file = os.path.join( + bimbam_dir, group_name + "_geno.txt") + pheno_input_file = os.path.join( + bimbam_dir, group_name + "_pheno.txt") + convertob = GenerateKinshipMatrices( + group_name, geno_input_file, pheno_input_file) + try: + convertob.generate_kinship() + except EmptyConfigurations as why: + print(" No config info? Continuing...") + continue + except Exception as why: + + print(" Exception:", why) + print(traceback.print_exc()) + print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, + convertob.latest_col_pos)) + print(" Column is:", convertob.latest_col_value) + print(" Row is:", convertob.latest_row_value) + break + + +if __name__ == "__main__": + Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/""" + Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/""" + GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) + + # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD diff --git a/gn2/maintenance/generate_probesetfreeze_file.py b/gn2/maintenance/generate_probesetfreeze_file.py new file mode 100644 index 00000000..00c2cddf --- /dev/null +++ b/gn2/maintenance/generate_probesetfreeze_file.py @@ -0,0 +1,122 @@ +#!/usr/bin/python + +import sys + +# sys.path.insert(0, "..") - why? + +import os +import collections +import csv + +from gn2.base import webqtlConfig + +from pprint import pformat as pf + +from gn2.utility.tools import get_setting +from gn2.wqflask.database import database_connection + + +def show_progress(process, counter): + if counter % 1000 == 0: + print("{}: {}".format(process, counter)) + + +def get_strains(cursor): + cursor.execute("""select Strain.Name + from Strain, StrainXRef, InbredSet + where Strain.Id = StrainXRef.StrainId and + StrainXRef.InbredSetId = InbredSet.Id + and InbredSet.Name=%s; + """, "BXD") + + strains = [strain[0] for strain in cursor.fetchall()] + print("strains:", pf(strains)) + for strain in strains: + print(" -", strain) + + return strains + + +def get_probeset_vals(cursor, dataset_name): + cursor.execute(""" select ProbeSet.Id, ProbeSet.Name + from ProbeSetXRef, + ProbeSetFreeze, + ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and + ProbeSetFreeze.Name = %s and + ProbeSetXRef.ProbeSetId = ProbeSet.Id; + """, dataset_name) + + probesets = cursor.fetchall() + + print("Fetched probesets") + + probeset_vals = collections.OrderedDict() + + for counter, probeset in enumerate(probesets): + cursor.execute(""" select Strain.Name, ProbeSetData.value + from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain + where ProbeSetData.Id = ProbeSetXRef.DataId + and ProbeSetData.StrainId = Strain.Id + and ProbeSetXRef.ProbeSetId = %s + and ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId + and ProbeSetFreeze.Name = %s; + """, (probeset[0], dataset_name)) + val_dic = collections.OrderedDict() + vals = cursor.fetchall() + for val in vals: + val_dic[val[0]] = val[1] + + probeset_vals[probeset[1]] = val_dic + show_progress("Querying DB", counter) + + return probeset_vals + + +def trim_strains(strains, probeset_vals): + trimmed_strains = [] + #print("probeset_vals is:", pf(probeset_vals)) + first_probeset = list(probeset_vals.values())[0] + print("\n**** first_probeset is:", pf(first_probeset)) + for strain in strains: + print("\n**** strain is:", pf(strain)) + if strain in first_probeset: + trimmed_strains.append(strain) + print("trimmed_strains:", pf(trimmed_strains)) + return trimmed_strains + + +def write_data_matrix_file(strains, probeset_vals, filename): + with open(filename, "wb") as fh: + csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) + #print("strains is:", pf(strains)) + csv_writer.writerow(['ID'] + strains) + for counter, probeset in enumerate(probeset_vals): + row_data = [probeset] + for strain in strains: + #print("probeset is: ", pf(probeset_vals[probeset])) + row_data.append(probeset_vals[probeset][strain]) + #print("row_data is: ", pf(row_data)) + csv_writer.writerow(row_data) + show_progress("Writing", counter) + + +def main(): + filename = os.path.expanduser( + "~/gene/wqflask/maintenance/" + "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + "(Oct08)_RankInv_Beta.txt") + dataset_name = "Eye_AXBXA_1008_RankInv" + + with database_connection(get_setting("SQL_URI")) as conn: + with conn.cursor() as cursor: + strains = get_strains(cursor) + print("Getting probset_vals") + probeset_vals = get_probeset_vals(cursor, dataset_name) + print("Finished getting probeset_vals") + trimmed_strains = trim_strains(strains, probeset_vals) + write_data_matrix_file(trimmed_strains, probeset_vals, filename) + + +if __name__ == '__main__': + main() diff --git a/gn2/maintenance/geno_to_json.py b/gn2/maintenance/geno_to_json.py new file mode 100644 index 00000000..7be2ed83 --- /dev/null +++ b/gn2/maintenance/geno_to_json.py @@ -0,0 +1,196 @@ +#!/usr/bin/python + +""" +Convert .geno files to json + +This file goes through all of the genofiles in the genofile directory (.geno) +and converts them to json files that are used when running the marker regression +code + +""" + +import sys +sys.path.append("..") +import os +import glob +import traceback +import gzip + +#import numpy as np +#from pyLMM import lmm + +import simplejson as json + +from pprint import pformat as pf + +#from gn2.utility.tools import flat_files + + +class EmptyConfigurations(Exception): + pass + + +class Marker: + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] + + +class ConvertGenoFile: + + def __init__(self, input_file, output_file): + + self.input_file = input_file + self.output_file = output_file + + self.mb_exists = False + self.cm_exists = False + self.markers = [] + + self.latest_row_pos = None + self.latest_col_pos = None + + self.latest_row_value = None + self.latest_col_value = None + + def convert(self): + + self.haplotype_notation = { + '@mat': "1", + '@pat': "0", + '@het': "0.5", + '@unk': "NA" + } + + self.configurations = {} + #self.skipped_cols = 3 + + # if self.input_file.endswith(".geno.gz"): + # print("self.input_file: ", self.input_file) + # self.input_fh = gzip.open(self.input_file) + # else: + self.input_fh = open(self.input_file) + + with open(self.output_file, "w") as self.output_fh: + # if self.file_type == "geno": + self.process_csv() + # elif self.file_type == "snps": + # self.process_snps_file() + + def process_csv(self): + for row_count, row in enumerate(self.process_rows()): + row_items = row.split("\t") + + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper() in self.configurations: + this_marker.genotypes.append( + self.configurations[genotype.upper()]) + else: + this_marker.genotypes.append("NA") + + #print("this_marker is:", pf(this_marker.__dict__)) + # if this_marker.chr == "14": + self.markers.append(this_marker.__dict__) + + with open(self.output_file, 'w') as fh: + json.dump(self.markers, fh, indent=" ", sort_keys=True) + + # print('configurations:', str(configurations)) + #self.latest_col_pos = item_count + self.skipped_cols + #self.latest_col_value = item + + # if item_count != 0: + # self.output_fh.write(" ") + # self.output_fh.write(self.configurations[item.upper()]) + + # self.output_fh.write("\n") + + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + # if self.input_file.endswith(".geno.gz"): + # print("row: ", row) + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row + + @classmethod + def process_all(cls, old_directory, new_directory): + os.chdir(old_directory) + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue + group_name = ".".join(input_file.split('.')[:-1]) + output_file = os.path.join(new_directory, group_name + ".json") + print("%s -> %s" % ( + os.path.join(old_directory, input_file), output_file)) + convertob = ConvertGenoFile(input_file, output_file) + try: + convertob.convert() + except EmptyConfigurations as why: + print(" No config info? Continuing...") + #excepted = True + continue + except Exception as why: + + print(" Exception:", why) + print(traceback.print_exc()) + print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, + convertob.latest_col_pos)) + print(" Column is:", convertob.latest_col_value) + print(" Row is:", convertob.latest_row_value) + break + + # def process_snps_file(cls, snps_file, new_directory): + # output_file = os.path.join(new_directory, "mouse_families.json") + # print("%s -> %s" % (snps_file, output_file)) + # convertob = ConvertGenoFile(input_file, output_file) + + +if __name__ == "__main__": + Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" + New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" + #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" + #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" + #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") + # convertob.convert() + ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) + # ConvertGenoFiles(Geno_Directory) + + #process_csv(Input_File, Output_File) diff --git a/gn2/maintenance/get_group_samplelists.py b/gn2/maintenance/get_group_samplelists.py new file mode 100644 index 00000000..6af637ea --- /dev/null +++ b/gn2/maintenance/get_group_samplelists.py @@ -0,0 +1,47 @@ +import os +import glob +import gzip + +from gn2.base import webqtlConfig + + +def get_samplelist(file_type, geno_file): + if file_type == "geno": + return get_samplelist_from_geno(geno_file) + elif file_type == "plink": + return get_samplelist_from_plink(geno_file) + + +def get_samplelist_from_geno(genofilename): + if os.path.isfile(genofilename + '.gz'): + genofilename += '.gz' + genofile = gzip.open(genofilename) + else: + genofile = open(genofilename) + + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + break + + headers = line.split("\t") + + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + + +def get_samplelist_from_plink(genofilename): + genofile = open(genofilename) + + samplelist = [] + for line in genofile: + line = line.split(" ") + samplelist.append(line[1]) + + return samplelist diff --git a/gn2/maintenance/print_benchmark.py b/gn2/maintenance/print_benchmark.py new file mode 100644 index 00000000..9d12da8a --- /dev/null +++ b/gn2/maintenance/print_benchmark.py @@ -0,0 +1,45 @@ +#!/usr/bin/python + +import time + +from pprint import pformat as pf + + +class TheCounter: + Counters = {} + + def __init__(self): + start_time = time.time() + for counter in range(170000): + self.print_it(counter) + self.time_took = time.time() - start_time + TheCounter.Counters[self.__class__.__name__] = self.time_took + + +class PrintAll(TheCounter): + def print_it(self, counter): + print(counter) + + +class PrintSome(TheCounter): + def print_it(self, counter): + if counter % 1000 == 0: + print(counter) + + +class PrintNone(TheCounter): + def print_it(self, counter): + pass + + +def new_main(): + print("Running new_main") + tests = [PrintAll, PrintSome, PrintNone] + for test in tests: + test() + + print(pf(TheCounter.Counters)) + + +if __name__ == '__main__': + new_main() diff --git a/gn2/maintenance/quantile_normalize.py b/gn2/maintenance/quantile_normalize.py new file mode 100644 index 00000000..5620b552 --- /dev/null +++ b/gn2/maintenance/quantile_normalize.py @@ -0,0 +1,98 @@ +import sys +sys.path.insert(0, './') +import urllib.parse + +import numpy as np +import pandas as pd + +from flask import Flask, g, request + +from gn2.wqflask import app +from gn2.wqflask.database import database_connection +from gn2.utility.tools import get_setting + + +def create_dataframe(input_file): + with open(input_file) as f: + ncols = len(f.readline().split("\t")) + + input_array = np.loadtxt(open( + input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) + return pd.DataFrame(input_array) + +# This function taken from https://github.com/ShawnLYU/Quantile_Normalize + + +def quantileNormalize(df_input): + df = df_input.copy() + # compute rank + dic = {} + for col in df: + dic.update({col: sorted(df[col])}) + sorted_df = pd.DataFrame(dic) + rank = sorted_df.mean(axis=1).tolist() + # sort + for col in df: + t = np.searchsorted(np.sort(df[col]), df[col]) + df[col] = [rank[i] for i in t] + return df + + +def set_data(cursor, dataset_name): + orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" + + sample_list = [] + with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh: + for i, (line1, line2) in enumerate(zip(orig_fh, quant_fh)): + trait_dict = {} + sample_list = [] + if i == 0: + sample_names = line1.split('\t')[1:] + else: + trait_name = line1.split('\t')[0] + for i, sample in enumerate(sample_names): + this_sample = { + "name": sample, + "value": line1.split('\t')[i + 1], + "qnorm": line2.split('\t')[i + 1] + } + sample_list.append(this_sample) + query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName + FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet + WHERE Species.Id = InbredSet.SpeciesId and + InbredSet.Id = ProbeFreeze.InbredSetId and + ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and + ProbeSetFreeze.Name = '%s' and + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and + ProbeSetXRef.ProbeSetId = ProbeSet.Id and + ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0]) + cursor.execute(query) + result_info = cursor.fetchone() + + yield { + "_index": "traits", + "_type": "trait", + "_source": { + "name": trait_name, + "species": result_info[0], + "group": result_info[1], + "dataset": dataset_name, + "dataset_fullname": result_info[2], + "samples": sample_list, + "transform_types": "qnorm" + } + } + + +if __name__ == '__main__': + with database_connection(get_setting("SQL_URI")) as conn: + with conn.cursor() as cursor: + success, _ = bulk(es, set_data(cursor, sys.argv[1])) + + response = es.search( + index="traits", doc_type="trait", body={ + "query": {"match": {"name": "ENSMUSG00000028982"}} + } + ) + + print(response) diff --git a/gn2/maintenance/set_resource_defaults.py b/gn2/maintenance/set_resource_defaults.py new file mode 100644 index 00000000..f9e5494a --- /dev/null +++ b/gn2/maintenance/set_resource_defaults.py @@ -0,0 +1,153 @@ +""" + +Script that sets default resource access masks for use with the DB proxy + +Defaults will be: +Owner - omni_gn +Mask - Public/non-confidential: { data: "view", + metadata: "view", + admin: "not-admin" } + Private/confidentia: { data: "no-access", + metadata: "no-access", + admin: "not-admin" } + +To run: +./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py + +""" + +import sys +import json + +# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead +sys.path.insert(0, './') + +# NEW: import app to avoid a circular dependency on utility.tools +from gn2.wqflask import app + +from gn2.utility import hmac +from gn2.utility.tools import get_setting +from gn2.utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info +Redis = get_redis_conn() + +import urllib.parse + +from gn2.wqflask.database import database_connection + + +def insert_probeset_resources(cursor, default_owner_id): + current_resources = Redis.hgetall("resources") + cursor.execute(""" SELECT + ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public + FROM + ProbeSetFreeze""") + + resource_results = cursor.fetchall() + for i, resource in enumerate(resource_results): + resource_ob = {} + resource_ob['name'] = resource[1] + resource_ob['owner_id'] = default_owner_id + resource_ob['data'] = {"dataset": str(resource[0])} + resource_ob['type'] = "dataset-probeset" + if resource[2] < 1 and resource[3] > 0: + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} + else: + resource_ob['default_mask'] = {"data": "no-access", + "metadata": "no-access", + "admin": "not-admin"} + resource_ob['group_masks'] = {} + + add_resource(resource_ob, update=False) + + +def insert_publish_resources(cursor, default_owner_id): + current_resources = Redis.hgetall("resources") + cursor.execute(""" SELECT + PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode + FROM + PublishXRef, PublishFreeze, InbredSet, Publication + WHERE + PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND + InbredSet.Id = PublishXRef.InbredSetId AND + Publication.Id = PublishXRef.PublicationId""") + + resource_results = cursor.fetchall() + for resource in resource_results: + if resource[2]: + resource_ob = {} + if resource[2]: + resource_ob['name'] = resource[2] + "_" + str(resource[0]) + else: + resource_ob['name'] = str(resource[0]) + resource_ob['owner_id'] = default_owner_id + resource_ob['data'] = {"dataset": str(resource[1]), + "trait": str(resource[0])} + resource_ob['type'] = "dataset-publish" + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} + + resource_ob['group_masks'] = {} + + add_resource(resource_ob, update=False) + else: + continue + + +def insert_geno_resources(cursor, default_owner_id): + current_resources = Redis.hgetall("resources") + cursor.execute(""" SELECT + GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality + FROM + GenoFreeze""") + + resource_results = cursor.fetchall() + for i, resource in enumerate(resource_results): + resource_ob = {} + resource_ob['name'] = resource[1] + if resource[1] == "HET3-ITPGeno": + resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" + else: + resource_ob['owner_id'] = default_owner_id + resource_ob['data'] = {"dataset": str(resource[0])} + resource_ob['type'] = "dataset-geno" + if resource[2] < 1: + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} + else: + resource_ob['default_mask'] = {"data": "no-access", + "metadata": "no-access", + "admin": "not-admin"} + resource_ob['group_masks'] = {} + + add_resource(resource_ob, update=False) + + +def insert_resources(default_owner_id): + current_resources = get_resources() + print("START") + insert_publish_resources(cursor, default_owner_id) + print("AFTER PUBLISH") + insert_geno_resources(cursor, default_owner_id) + print("AFTER GENO") + insert_probeset_resources(cursor, default_owner_id) + print("AFTER PROBESET") + + +def main(cursor): + """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" + + Redis.delete("resources") + + owner_id = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" + + insert_resources(owner_id) + + +if __name__ == '__main__': + with database_connection(get_setting("SQL_URI")) as conn: + with conn.cursor() as cursor: + main(cursor) |