aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/README.md4
-rw-r--r--wqflask/maintenance/__init__.py0
-rw-r--r--wqflask/maintenance/convert_dryad_to_bimbam.py72
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py201
-rw-r--r--wqflask/maintenance/gen_ind_genofiles.py253
-rw-r--r--wqflask/maintenance/gen_select_dataset.py296
-rw-r--r--wqflask/maintenance/generate_kinship_from_bimbam.py66
-rw-r--r--wqflask/maintenance/generate_probesetfreeze_file.py122
-rw-r--r--wqflask/maintenance/geno_to_json.py196
-rw-r--r--wqflask/maintenance/get_group_samplelists.py47
-rw-r--r--wqflask/maintenance/print_benchmark.py45
-rw-r--r--wqflask/maintenance/quantile_normalize.py98
-rw-r--r--wqflask/maintenance/set_resource_defaults.py153
13 files changed, 0 insertions, 1553 deletions
diff --git a/wqflask/maintenance/README.md b/wqflask/maintenance/README.md
deleted file mode 100644
index 873eaa32..00000000
--- a/wqflask/maintenance/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-Maintenance files have been moved into a separate repository named
-*gn_extra*. See https://github.com/genenetwork/gn_extra
-
-
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/wqflask/maintenance/__init__.py
+++ /dev/null
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
deleted file mode 100644
index 18fbb8a1..00000000
--- a/wqflask/maintenance/convert_dryad_to_bimbam.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/python
-
-"""
-Convert data dryad files to a BIMBAM _geno and _snps file
-
-
-"""
-
-import sys
-sys.path.append("..")
-
-
-def read_dryad_file(filename):
- exclude_count = 0
- marker_list = []
- sample_dict = {}
- sample_list = []
- geno_rows = []
- with open(filename, 'r') as the_file:
- for i, line in enumerate(the_file):
- if i > 0:
- if line.split(" ")[1] == "no":
- sample_name = line.split(" ")[0]
- sample_list.append(sample_name)
- sample_dict[sample_name] = line.split(" ")[2:]
- else:
- exclude_count += 1
- else:
- marker_list = line.split(" ")[2:]
-
- for i, marker in enumerate(marker_list):
- this_row = []
- this_row.append(marker)
- this_row.append("X")
- this_row.append("Y")
- for sample in sample_list:
- this_row.append(sample_dict[sample][i])
- geno_rows.append(this_row)
-
- print(exclude_count)
-
- return geno_rows
-
- # for i, marker in enumerate(marker_list):
- # this_row = []
- # this_row.append(marker)
- # this_row.append("X")
- # this_row.append("Y")
- # with open(filename, 'r') as the_file:
- # for j, line in enumerate(the_file):
- # if j > 0:
- # this_row.append(line.split(" ")[i+2])
- # print("row: " + str(i))
- # geno_rows.append(this_row)
- #
- # return geno_rows
-
-
-def write_bimbam_files(geno_rows):
- with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
- for row in geno_rows:
- geno_fh.write(", ".join(row) + "\n")
-
-
-def convert_dryad_to_bimbam(filename):
- geno_file_rows = read_dryad_file(filename)
- write_bimbam_files(geno_file_rows)
-
-
-if __name__ == "__main__":
- input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
- convert_dryad_to_bimbam(input_filename)
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
deleted file mode 100644
index 078be529..00000000
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/python
-
-"""
-Convert .geno files to json
-
-This file goes through all of the genofiles in the genofile directory (.geno)
-and converts them to json files that are used when running the marker regression
-code
-
-"""
-
-import sys
-sys.path.append("..")
-import os
-import glob
-import traceback
-import gzip
-
-import simplejson as json
-
-from pprint import pformat as pf
-
-
-class EmptyConfigurations(Exception):
- pass
-
-
-class Marker:
- def __init__(self):
- self.name = None
- self.chr = None
- self.cM = None
- self.Mb = None
- self.genotypes = []
-
-
-class ConvertGenoFile:
-
- def __init__(self, input_file, output_files):
- self.input_file = input_file
- self.output_files = output_files
-
- self.mb_exists = False
- self.cm_exists = False
- self.markers = []
-
- self.latest_row_pos = None
- self.latest_col_pos = None
-
- self.latest_row_value = None
- self.latest_col_value = None
-
- def convert(self):
- self.haplotype_notation = {
- '@mat': "1",
- '@pat': "0",
- '@het': "0.5",
- '@unk': "NA"
- }
-
- self.configurations = {}
- self.input_fh = open(self.input_file)
-
- self.process_csv()
-
- def process_csv(self):
- for row in self.process_rows():
- row_items = row.split("\t")
-
- this_marker = Marker()
- this_marker.name = row_items[1]
- this_marker.chr = row_items[0]
- if self.cm_exists and self.mb_exists:
- this_marker.cM = row_items[2]
- this_marker.Mb = row_items[3]
- genotypes = row_items[4:]
- elif self.cm_exists:
- this_marker.cM = row_items[2]
- genotypes = row_items[3:]
- elif self.mb_exists:
- this_marker.Mb = row_items[2]
- genotypes = row_items[3:]
- else:
- genotypes = row_items[2:]
- for item_count, genotype in enumerate(genotypes):
- if genotype.upper().strip() in self.configurations:
- this_marker.genotypes.append(
- self.configurations[genotype.upper().strip()])
- else:
- this_marker.genotypes.append("NA")
-
- self.markers.append(this_marker.__dict__)
-
- self.write_to_bimbam()
-
- def write_to_bimbam(self):
- with open(self.output_files[0], "w") as geno_fh:
- for marker in self.markers:
- geno_fh.write(marker['name'])
- geno_fh.write(", X, Y")
- geno_fh.write(", " + ", ".join(marker['genotypes']))
- geno_fh.write("\n")
-
- with open(self.output_files[1], "w") as pheno_fh:
- for sample in self.sample_list:
- pheno_fh.write("1\n")
-
- with open(self.output_files[2], "w") as snp_fh:
- for marker in self.markers:
- if self.mb_exists:
- snp_fh.write(
- marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
- else:
- snp_fh.write(
- marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
-
- def get_sample_list(self, row_contents):
- self.sample_list = []
- if self.mb_exists:
- if self.cm_exists:
- self.sample_list = row_contents[4:]
- else:
- self.sample_list = row_contents[3:]
- else:
- if self.cm_exists:
- self.sample_list = row_contents[3:]
- else:
- self.sample_list = row_contents[2:]
-
- def process_rows(self):
- for self.latest_row_pos, row in enumerate(self.input_fh):
- self.latest_row_value = row
- # Take care of headers
- if not row.strip():
- continue
- if row.startswith('#'):
- continue
- if row.startswith('Chr'):
- if 'Mb' in row.split():
- self.mb_exists = True
- if 'cM' in row.split():
- self.cm_exists = True
- self.get_sample_list(row.split())
- continue
- if row.startswith('@'):
- key, _separater, value = row.partition(':')
- key = key.strip()
- value = value.strip()
- if key == "@filler":
- raise EmptyConfigurations
- if key in self.haplotype_notation:
- self.configurations[value] = self.haplotype_notation[key]
- continue
- if not len(self.configurations):
- raise EmptyConfigurations
- yield row
-
- @classmethod
- def process_all(cls, old_directory, new_directory):
- os.chdir(old_directory)
- for input_file in glob.glob("*"):
- if not input_file.endswith(('geno', '.geno.gz')):
- continue
- group_name = ".".join(input_file.split('.')[:-1])
- if group_name == "HSNIH-Palmer":
- continue
- geno_output_file = os.path.join(
- new_directory, group_name + "_geno.txt")
- pheno_output_file = os.path.join(
- new_directory, group_name + "_pheno.txt")
- snp_output_file = os.path.join(
- new_directory, group_name + "_snps.txt")
- output_files = [geno_output_file,
- pheno_output_file, snp_output_file]
- print("%s -> %s" % (
- os.path.join(old_directory, input_file), geno_output_file))
- convertob = ConvertGenoFile(input_file, output_files)
- try:
- convertob.convert()
- except EmptyConfigurations as why:
- print(" No config info? Continuing...")
- continue
- except Exception as why:
- print(" Exception:", why)
- print(traceback.print_exc())
- print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
- convertob.latest_col_pos))
- print(" Column is:", convertob.latest_col_value)
- print(" Row is:", convertob.latest_row_value)
- break
-
-
-if __name__ == "__main__":
- Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
- New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
- #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
- #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
- #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
- # convertob.convert()
- ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
- # ConvertGenoFiles(Geno_Directory)
diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py
deleted file mode 100644
index b755c648..00000000
--- a/wqflask/maintenance/gen_ind_genofiles.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env python3
-"""A script that generates the genotype files for groups of individuals, using an existing strain genotype file as a basis
-
-Example commands:
-python3 gen_ind_genofiles.py
- /home/zas1024/gn2-zach/genotype_files/genotype/
- /home/zas1024/gn2-zach/new_geno/
- BXD-Micturition.geno
- BXD.json
-python3 gen_ind_genofiles.py
- /home/zas1024/gn2-zach/genotype_files/genotype
- /home/zas1024/gn2-zach/new_geno/
- BXD-Micturition.geno
- BXD.2.geno BXD.4.geno BXD.5.geno
-
-"""
-
-import json
-import os
-import sys
-from typing import List
-
-import MySQLdb
-
-def conn():
- return MySQLdb.Connect(db=os.environ.get("DB_NAME"),
- user=os.environ.get("DB_USER"),
- passwd=os.environ.get("DB_PASS"),
- host=os.environ.get("DB_HOST"))
-
-def main(args):
-
- # Directory in which .geno files are located
- geno_dir = args[1]
-
- # Directory in which to output new files
- out_dir = args[2]
-
- # The individuals group that we want to generate a .geno file for
- target_file = geno_dir + args[3]
-
- # The source group(s) we're generating the .geno files from
- # This can be passed as either a specific .geno file (or set of files as multiple arguments),
- # or as a JSON file containing a set of .geno files (and their corresponding file names and sample lists)
- geno_json = {}
- source_files = []
- if ".json" in args[4]:
- geno_json = json.load(open(geno_dir + args[4], "r"))
- par_f1s = {
- "mat": geno_json['mat'],
- "pat": geno_json['pat'],
- "f1s": geno_json['f1s']
- }
-
- # List of file titles and locations from JSON
- source_files = [{'title': genofile['title'], 'location': geno_dir + genofile['location']} for genofile in geno_json['genofile']]
- else:
- par_f1s = {}
- # List of files directly taken from command line arguments, with titles just set to the filename
- for group in args[4:]:
- file_name = geno_dir + group + ".geno" if ".geno" not in group else geno_dir + group
- source_files.append({'title': file_name[:-5], 'location': file_name})
-
- if len(source_files) > 1:
- # Generate a JSON file pointing to the new target genotype files, in situations where there are multiple source .geno files
- target_json_loc = out_dir + ".".join(args[3].split(".")[:-1]) + ".json"
- target_json = {'genofile': []}
-
- # Generate the output .geno files
- for source_file in source_files:
- filename, samples = generate_new_genofile(source_file['location'], target_file, par_f1s, out_dir)
-
- target_json['genofile'].append({
- 'location': filename.split("/")[-1],
- 'title': source_file['title'],
- 'sample_list': samples
- })
-
- json.dump(target_json, open(target_json_loc, "w"))
- else:
- filename, samples = generate_new_genofile(source_files[0]['location'], target_file, par_f1s, out_dir)
-
-def get_strain_for_sample(sample):
- query = (
- "SELECT CaseAttributeXRefNew.Value "
- "FROM CaseAttributeXRefNew, Strain "
- "WHERE CaseAttributeXRefNew.CaseAttributeId=11 "
- "AND CaseAttributeXRefNew.StrainId = Strain.Id "
- "AND Strain.Name = %(name)s" )
-
- with conn().cursor() as cursor:
- cursor.execute(query, {"name": sample.strip()})
- strain = cursor.fetchone()[0]
- return strain
-
-def generate_new_genofile(source_genofile, target_genofile, par_f1s, out_dir):
- source_samples = group_samples(source_genofile)
- source_genotypes = strain_genotypes(source_genofile)
- target_samples = group_samples(target_genofile)
- strain_pos_map = map_strain_pos_to_target_group(source_samples, target_samples, par_f1s)
-
- if len(source_genofile.split("/")[-1].split(".")) > 2:
- # The number in the source genofile; for example 4 in BXD.4.geno
- source_num = source_genofile.split("/")[-1].split(".")[-2]
- target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + "." + source_num + ".geno"
- else:
- target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + ".geno"
-
- file_location = out_dir + target_filename
-
- with open(file_location, "w") as fh:
- for metadata in ["name", "type", "mat", "pat", "het", "unk"]:
- fh.write("@" + metadata + ":" + source_genotypes[metadata] + "\n")
-
- header_line = ["Chr", "Locus", "cM", "Mb"] + target_samples
- fh.write("\t".join(header_line) + "\n")
-
- for marker in source_genotypes['markers']:
- line_items = [
- marker['Chr'],
- marker['Locus'],
- marker['cM'],
- marker['Mb']
- ]
-
- for pos in strain_pos_map:
- if isinstance(pos, int):
- line_items.append(marker['genotypes'][pos])
- else:
- if pos in ["mat", "pat"]:
- line_items.append(source_genotypes[pos])
- elif pos == "f1s":
- line_items.append("H")
- else:
- line_items.append("U")
-
- fh.write("\t".join(line_items) + "\n")
-
- return file_location, target_samples
-
-def map_strain_pos_to_target_group(source_samples, target_samples, par_f1s):
- """
- Retrieve corresponding strain position for each sample in the target group
-
- This is so the genotypes from the base genofile can be mapped to the samples in the target group
-
- For example:
- Base strains: BXD1, BXD2, BXD3
- Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3
- Returns: [0, 0, 1, 2, 2, 2]
- """
- pos_map = []
- for sample in target_samples:
- sample_strain = get_strain_for_sample(sample)
- if sample_strain in source_samples:
- pos_map.append(source_samples.index(sample_strain))
- else:
- val = "U"
- for key in par_f1s.keys():
- if sample_strain in par_f1s[key]:
- val = key
- pos_map.append(val)
-
- return pos_map
-
-def group_samples(target_file: str) -> List:
- """
- Get the group samples from its "dummy" .geno file (which still contains the sample list)
- """
-
- sample_list = []
- with open(target_file, "r") as target_geno:
- for i, line in enumerate(target_geno):
- # Skip header lines
- if line[0] in ["#", "@"] or not len(line):
- continue
-
- line_items = line.split()
-
- sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]]
- break
-
- return sample_list
-
-def strain_genotypes(strain_genofile: str) -> List:
- """
- Read genotypes from source strain .geno file
-
- :param strain_genofile: string of genofile filename
- :return: a list of dictionaries representing each marker's genotypes
-
- Example output: [
- {
- 'Chr': '1',
- 'Locus': 'marker1',
- 'Mb': '10.0',
- 'cM': '8.0',
- 'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...]
- },
- ...
- ]
- """
-
- geno_dict = {}
-
- geno_start_col = None
- header_columns = []
- sample_list = []
- markers = []
- with open(strain_genofile, "r") as source_geno:
- for i, line in enumerate(source_geno):
- if line[0] == "@":
- metadata_type = line[1:].split(":")[0]
- if metadata_type in ['name', 'type', 'mat', 'pat', 'het', 'unk']:
- geno_dict[metadata_type] = line.split(":")[1].strip()
-
- continue
-
- # Skip other header lines
- if line[0] == "#" or not len(line):
- continue
-
- line_items = line.split("\t")
- if "Chr" in line_items: # Header row
- # Get the first column index containing genotypes
- header_columns = line_items
- for j, item in enumerate(line_items):
- if item not in ["Chr", "Locus", "Mb", "cM"]:
- geno_start_col = j
- break
-
- sample_list = line_items[geno_start_col:]
- if not geno_start_col:
- print("Check .geno file - expected columns not found")
- sys.exit()
- else: # Marker rows
- this_marker = {
- 'Chr': line_items[header_columns.index("Chr")],
- 'Locus': line_items[header_columns.index("Locus")],
- 'Mb': line_items[header_columns.index("Mb")],
- 'cM': line_items[header_columns.index("cM")],
- 'genotypes': [item.strip() for item in line_items][geno_start_col:]
- }
-
- markers.append(this_marker)
-
- geno_dict['markers'] = markers
-
- return geno_dict
-
-if __name__ == "__main__":
- main(sys.argv)
-
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
deleted file mode 100644
index 01b2fc15..00000000
--- a/wqflask/maintenance/gen_select_dataset.py
+++ /dev/null
@@ -1,296 +0,0 @@
-"""Script that generates the data for the main dropdown menus on the home page
-
-Writes out data as /static/new/javascript/dataset_menu_structure.json
-It needs to be run manually when database has been changed. Run it as
-
- ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
-
-"""
-
-
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams
-# at rwilliams@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-
-import sys
-
-# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
-sys.path.insert(0, './')
-# NEW: import app to avoid a circular dependency on utility.tools
-from wqflask import app
-
-from utility.tools import get_setting
-
-import simplejson as json
-import urllib.parse
-
-
-from pprint import pformat as pf
-
-from wqflask.database import database_connection
-
-
-def get_species(cursor):
- """Build species list"""
- #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
- cursor.execute("select Name, MenuName from Species order by OrderId")
- species = list(cursor.fetchall())
- return species
-
-
-def get_groups(cursor, species):
- """Build groups list"""
- groups = {}
- for species_name, _species_full_name in species:
- cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet,
- Species,
- ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s'
- and InbredSet.SpeciesId = Species.Id and
- (PublishFreeze.InbredSetId = InbredSet.Id
- or GenoFreeze.InbredSetId = InbredSet.Id
- or ProbeFreeze.InbredSetId = InbredSet.Id)
- group by InbredSet.Name
- order by InbredSet.FullName""" % species_name)
- results = cursor.fetchall()
- groups[species_name] = list(results)
- return groups
-
-
-def get_types(groups):
- """Build types list"""
- types = {}
- #print("Groups: ", pf(groups))
- for species, group_dict in list(groups.items()):
- types[species] = {}
- for group_name, _group_full_name in group_dict:
- # make group an alias to shorten the code
- #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")]
- if phenotypes_exist(group_name):
- types[species][group_name] = [("Phenotypes", "Phenotypes")]
- if genotypes_exist(group_name):
- if group_name in types[species]:
- types[species][group_name] += [("Genotypes", "Genotypes")]
- else:
- types[species][group_name] = [("Genotypes", "Genotypes")]
- if group_name in types[species]:
- types_list = build_types(species, group_name)
- if len(types_list) > 0:
- types[species][group_name] += types_list
- else:
- if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
- types[species].pop(group_name, None)
- groups[species] = tuple(
- group for group in groups[species] if group[0] != group_name)
- else: # ZS: This whole else statement might be unnecessary, need to check
- types_list = build_types(species, group_name)
- if len(types_list) > 0:
- types[species][group_name] = types_list
- else:
- types[species].pop(group_name, None)
- groups[species] = tuple(
- group for group in groups[species] if group[0] != group_name)
- return types
-
-
-def phenotypes_exist(group_name):
- #print("group_name:", group_name)
- Cursor.execute("""select Name from PublishFreeze
- where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
-
- results = Cursor.fetchone()
- #print("RESULTS:", results)
-
- if results != None:
- return True
- else:
- return False
-
-
-def genotypes_exist(group_name):
- #print("group_name:", group_name)
- Cursor.execute("""select Name from GenoFreeze
- where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
-
- results = Cursor.fetchone()
- #print("RESULTS:", results)
-
- if results != None:
- return True
- else:
- return False
-
-
-def build_types(species, group):
- """Fetches tissues
-
- Gets the tissues with data for this species/group
- (all types except phenotype/genotype are tissues)
-
- """
-
- Cursor.execute("""select distinct Tissue.Name
- from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
- where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
- InbredSet.Name = '%s' and
- ProbeFreeze.TissueId = Tissue.Id and
- ProbeFreeze.InbredSetId = InbredSet.Id and
- ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
- ProbeSetFreeze.public > 0 and
- ProbeSetFreeze.confidentiality < 1
- order by Tissue.Name""" % (species, group))
-
- results = []
- for result in Cursor.fetchall():
- if len(result):
- these_datasets = build_datasets(species, group, result[0])
- if len(these_datasets) > 0:
- results.append((result[0], result[0]))
-
- return results
-
-
-def get_datasets(types):
- """Build datasets list"""
- datasets = {}
- for species, group_dict in list(types.items()):
- datasets[species] = {}
- for group, type_list in list(group_dict.items()):
- datasets[species][group] = {}
- for type_name in type_list:
- these_datasets = build_datasets(species, group, type_name[0])
- if len(these_datasets) > 0:
- datasets[species][group][type_name[0]] = these_datasets
-
- return datasets
-
-
-def build_datasets(species, group, type_name):
- """Gets dataset names from database"""
- dataset_text = dataset_value = None
- datasets = []
- if type_name == "Phenotypes":
- Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where
- InbredSet.Name = '%s' and
- PublishFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = PublishFreeze.Name order by
- PublishFreeze.CreateTime asc""" % group)
-
- results = Cursor.fetchall()
- if len(results) > 0:
- for result in results:
- print(result)
- dataset_id = str(result[0])
- dataset_value = str(result[1])
- if group == 'MDP':
- dataset_text = "Mouse Phenome Database"
- else:
- #dataset_text = "%s Phenotypes" % group
- dataset_text = str(result[2])
- datasets.append((dataset_id, dataset_value, dataset_text))
- else:
- dataset_id = "None"
- dataset_value = "%sPublish" % group
- dataset_text = "%s Phenotypes" % group
- datasets.append((dataset_id, dataset_value, dataset_text))
-
- elif type_name == "Genotypes":
- Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
- InbredSet.Name = '%s' and
- GenoFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = GenoFreeze.ShortName and
- GenoFreeze.public > 0 and
- GenoFreeze.confidentiality < 1 order by
- GenoFreeze.CreateTime desc""" % group)
-
- results = Cursor.fetchone()
- if results != None:
- dataset_id = str(results[0])
- else:
- dataset_id = "None"
- dataset_value = "%sGeno" % group
- dataset_text = "%s Genotypes" % group
- datasets.append((dataset_id, dataset_value, dataset_text))
-
- else: # for mRNA expression/ProbeSet
- Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
- ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
- Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
- InbredSet.Name = '%s' and
- ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and
- ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and
- ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by
- ProbeSetFreeze.CreateTime desc""" % (species, group, type_name))
-
- dataset_results = Cursor.fetchall()
- datasets = []
- for dataset_info in dataset_results:
- this_dataset_info = []
- for info in dataset_info:
- this_dataset_info.append(str(info))
- datasets.append(this_dataset_info)
-
- return datasets
-
-
-def main(cursor):
- """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
-
- species = get_species(cursor)
- groups = get_groups(cursor, species)
- types = get_types(groups)
- datasets = get_datasets(types)
-
- #species.append(('All Species', 'All Species'))
- #groups['All Species'] = [('All Groups', 'All Groups')]
- #types['All Species'] = {}
- #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
- #datasets['All Species'] = {}
- #datasets['All Species']['All Groups'] = {}
- #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
-
- data = dict(species=species,
- groups=groups,
- types=types,
- datasets=datasets,
- )
-
- #print("data:", data)
-
- output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json"""
-
- with open(output_file, 'w') as fh:
- json.dump(data, fh, indent=" ", sort_keys=True)
-
- #print("\nWrote file to:", output_file)
-
-
-def _test_it():
- """Used for internal testing only"""
- types = build_types("Mouse", "BXD")
- #print("build_types:", pf(types))
- datasets = build_datasets("Mouse", "BXD", "Hippocampus")
- #print("build_datasets:", pf(datasets))
-
-
-if __name__ == '__main__':
- with database_connection(get_setting("SQL_URI")) as conn:
- with conn.cursor() as cursor:
- main(cursor)
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
deleted file mode 100644
index 9f01d094..00000000
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/python
-
-"""
-Generate relatedness matrix files for GEMMA from BIMBAM genotype/phenotype files
-
-This file goes through all of the BIMBAM files in the bimbam diretory
-and uses GEMMA to generate their corresponding kinship/relatedness matrix file
-
-"""
-
-import sys
-sys.path.append("..")
-import os
-import glob
-
-
-class GenerateKinshipMatrices:
- def __init__(self, group_name, geno_file, pheno_file):
- self.group_name = group_name
- self.geno_file = geno_file
- self.pheno_file = pheno_file
-
- def generate_kinship(self):
- gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \
- " -p " + self.pheno_file + \
- " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
- print("command:", gemma_command)
- os.system(gemma_command)
-
- @classmethod
- def process_all(self, geno_dir, bimbam_dir):
- os.chdir(geno_dir)
- for input_file in glob.glob("*"):
- if not input_file.endswith(('geno', '.geno.gz')):
- continue
- group_name = ".".join(input_file.split('.')[:-1])
- if group_name == "HSNIH-Palmer":
- continue
- geno_input_file = os.path.join(
- bimbam_dir, group_name + "_geno.txt")
- pheno_input_file = os.path.join(
- bimbam_dir, group_name + "_pheno.txt")
- convertob = GenerateKinshipMatrices(
- group_name, geno_input_file, pheno_input_file)
- try:
- convertob.generate_kinship()
- except EmptyConfigurations as why:
- print(" No config info? Continuing...")
- continue
- except Exception as why:
-
- print(" Exception:", why)
- print(traceback.print_exc())
- print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
- convertob.latest_col_pos))
- print(" Column is:", convertob.latest_col_value)
- print(" Row is:", convertob.latest_row_value)
- break
-
-
-if __name__ == "__main__":
- Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/"""
- Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/"""
- GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
-
- # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py
deleted file mode 100644
index 2f917c71..00000000
--- a/wqflask/maintenance/generate_probesetfreeze_file.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-# sys.path.insert(0, "..") - why?
-
-import os
-import collections
-import csv
-
-from base import webqtlConfig
-
-from pprint import pformat as pf
-
-from utility.tools import get_setting
-from wqflask.database import database_connection
-
-
-def show_progress(process, counter):
- if counter % 1000 == 0:
- print("{}: {}".format(process, counter))
-
-
-def get_strains(cursor):
- cursor.execute("""select Strain.Name
- from Strain, StrainXRef, InbredSet
- where Strain.Id = StrainXRef.StrainId and
- StrainXRef.InbredSetId = InbredSet.Id
- and InbredSet.Name=%s;
- """, "BXD")
-
- strains = [strain[0] for strain in cursor.fetchall()]
- print("strains:", pf(strains))
- for strain in strains:
- print(" -", strain)
-
- return strains
-
-
-def get_probeset_vals(cursor, dataset_name):
- cursor.execute(""" select ProbeSet.Id, ProbeSet.Name
- from ProbeSetXRef,
- ProbeSetFreeze,
- ProbeSet
- where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and
- ProbeSetFreeze.Name = %s and
- ProbeSetXRef.ProbeSetId = ProbeSet.Id;
- """, dataset_name)
-
- probesets = cursor.fetchall()
-
- print("Fetched probesets")
-
- probeset_vals = collections.OrderedDict()
-
- for counter, probeset in enumerate(probesets):
- cursor.execute(""" select Strain.Name, ProbeSetData.value
- from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain
- where ProbeSetData.Id = ProbeSetXRef.DataId
- and ProbeSetData.StrainId = Strain.Id
- and ProbeSetXRef.ProbeSetId = %s
- and ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId
- and ProbeSetFreeze.Name = %s;
- """, (probeset[0], dataset_name))
- val_dic = collections.OrderedDict()
- vals = cursor.fetchall()
- for val in vals:
- val_dic[val[0]] = val[1]
-
- probeset_vals[probeset[1]] = val_dic
- show_progress("Querying DB", counter)
-
- return probeset_vals
-
-
-def trim_strains(strains, probeset_vals):
- trimmed_strains = []
- #print("probeset_vals is:", pf(probeset_vals))
- first_probeset = list(probeset_vals.values())[0]
- print("\n**** first_probeset is:", pf(first_probeset))
- for strain in strains:
- print("\n**** strain is:", pf(strain))
- if strain in first_probeset:
- trimmed_strains.append(strain)
- print("trimmed_strains:", pf(trimmed_strains))
- return trimmed_strains
-
-
-def write_data_matrix_file(strains, probeset_vals, filename):
- with open(filename, "wb") as fh:
- csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL)
- #print("strains is:", pf(strains))
- csv_writer.writerow(['ID'] + strains)
- for counter, probeset in enumerate(probeset_vals):
- row_data = [probeset]
- for strain in strains:
- #print("probeset is: ", pf(probeset_vals[probeset]))
- row_data.append(probeset_vals[probeset][strain])
- #print("row_data is: ", pf(row_data))
- csv_writer.writerow(row_data)
- show_progress("Writing", counter)
-
-
-def main():
- filename = os.path.expanduser(
- "~/gene/wqflask/maintenance/"
- "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2"
- "(Oct08)_RankInv_Beta.txt")
- dataset_name = "Eye_AXBXA_1008_RankInv"
-
- with database_connection(get_setting("SQL_URI")) as conn:
- with conn.cursor() as cursor:
- strains = get_strains(cursor)
- print("Getting probset_vals")
- probeset_vals = get_probeset_vals(cursor, dataset_name)
- print("Finished getting probeset_vals")
- trimmed_strains = trim_strains(strains, probeset_vals)
- write_data_matrix_file(trimmed_strains, probeset_vals, filename)
-
-
-if __name__ == '__main__':
- main()
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
deleted file mode 100644
index 32e0e34b..00000000
--- a/wqflask/maintenance/geno_to_json.py
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/python
-
-"""
-Convert .geno files to json
-
-This file goes through all of the genofiles in the genofile directory (.geno)
-and converts them to json files that are used when running the marker regression
-code
-
-"""
-
-import sys
-sys.path.append("..")
-import os
-import glob
-import traceback
-import gzip
-
-#import numpy as np
-#from pyLMM import lmm
-
-import simplejson as json
-
-from pprint import pformat as pf
-
-#from utility.tools import flat_files
-
-
-class EmptyConfigurations(Exception):
- pass
-
-
-class Marker:
- def __init__(self):
- self.name = None
- self.chr = None
- self.cM = None
- self.Mb = None
- self.genotypes = []
-
-
-class ConvertGenoFile:
-
- def __init__(self, input_file, output_file):
-
- self.input_file = input_file
- self.output_file = output_file
-
- self.mb_exists = False
- self.cm_exists = False
- self.markers = []
-
- self.latest_row_pos = None
- self.latest_col_pos = None
-
- self.latest_row_value = None
- self.latest_col_value = None
-
- def convert(self):
-
- self.haplotype_notation = {
- '@mat': "1",
- '@pat': "0",
- '@het': "0.5",
- '@unk': "NA"
- }
-
- self.configurations = {}
- #self.skipped_cols = 3
-
- # if self.input_file.endswith(".geno.gz"):
- # print("self.input_file: ", self.input_file)
- # self.input_fh = gzip.open(self.input_file)
- # else:
- self.input_fh = open(self.input_file)
-
- with open(self.output_file, "w") as self.output_fh:
- # if self.file_type == "geno":
- self.process_csv()
- # elif self.file_type == "snps":
- # self.process_snps_file()
-
- def process_csv(self):
- for row_count, row in enumerate(self.process_rows()):
- row_items = row.split("\t")
-
- this_marker = Marker()
- this_marker.name = row_items[1]
- this_marker.chr = row_items[0]
- if self.cm_exists and self.mb_exists:
- this_marker.cM = row_items[2]
- this_marker.Mb = row_items[3]
- genotypes = row_items[4:]
- elif self.cm_exists:
- this_marker.cM = row_items[2]
- genotypes = row_items[3:]
- elif self.mb_exists:
- this_marker.Mb = row_items[2]
- genotypes = row_items[3:]
- else:
- genotypes = row_items[2:]
- for item_count, genotype in enumerate(genotypes):
- if genotype.upper() in self.configurations:
- this_marker.genotypes.append(
- self.configurations[genotype.upper()])
- else:
- this_marker.genotypes.append("NA")
-
- #print("this_marker is:", pf(this_marker.__dict__))
- # if this_marker.chr == "14":
- self.markers.append(this_marker.__dict__)
-
- with open(self.output_file, 'w') as fh:
- json.dump(self.markers, fh, indent=" ", sort_keys=True)
-
- # print('configurations:', str(configurations))
- #self.latest_col_pos = item_count + self.skipped_cols
- #self.latest_col_value = item
-
- # if item_count != 0:
- # self.output_fh.write(" ")
- # self.output_fh.write(self.configurations[item.upper()])
-
- # self.output_fh.write("\n")
-
- def process_rows(self):
- for self.latest_row_pos, row in enumerate(self.input_fh):
- # if self.input_file.endswith(".geno.gz"):
- # print("row: ", row)
- self.latest_row_value = row
- # Take care of headers
- if not row.strip():
- continue
- if row.startswith('#'):
- continue
- if row.startswith('Chr'):
- if 'Mb' in row.split():
- self.mb_exists = True
- if 'cM' in row.split():
- self.cm_exists = True
- continue
- if row.startswith('@'):
- key, _separater, value = row.partition(':')
- key = key.strip()
- value = value.strip()
- if key in self.haplotype_notation:
- self.configurations[value] = self.haplotype_notation[key]
- continue
- if not len(self.configurations):
- raise EmptyConfigurations
- yield row
-
- @classmethod
- def process_all(cls, old_directory, new_directory):
- os.chdir(old_directory)
- for input_file in glob.glob("*"):
- if not input_file.endswith(('geno', '.geno.gz')):
- continue
- group_name = ".".join(input_file.split('.')[:-1])
- output_file = os.path.join(new_directory, group_name + ".json")
- print("%s -> %s" % (
- os.path.join(old_directory, input_file), output_file))
- convertob = ConvertGenoFile(input_file, output_file)
- try:
- convertob.convert()
- except EmptyConfigurations as why:
- print(" No config info? Continuing...")
- #excepted = True
- continue
- except Exception as why:
-
- print(" Exception:", why)
- print(traceback.print_exc())
- print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
- convertob.latest_col_pos))
- print(" Column is:", convertob.latest_col_value)
- print(" Row is:", convertob.latest_row_value)
- break
-
- # def process_snps_file(cls, snps_file, new_directory):
- # output_file = os.path.join(new_directory, "mouse_families.json")
- # print("%s -> %s" % (snps_file, output_file))
- # convertob = ConvertGenoFile(input_file, output_file)
-
-
-if __name__ == "__main__":
- Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
- New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
- #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
- #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
- #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
- # convertob.convert()
- ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
- # ConvertGenoFiles(Geno_Directory)
-
- #process_csv(Input_File, Output_File)
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
deleted file mode 100644
index 0a450d3f..00000000
--- a/wqflask/maintenance/get_group_samplelists.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import os
-import glob
-import gzip
-
-from base import webqtlConfig
-
-
-def get_samplelist(file_type, geno_file):
- if file_type == "geno":
- return get_samplelist_from_geno(geno_file)
- elif file_type == "plink":
- return get_samplelist_from_plink(geno_file)
-
-
-def get_samplelist_from_geno(genofilename):
- if os.path.isfile(genofilename + '.gz'):
- genofilename += '.gz'
- genofile = gzip.open(genofilename)
- else:
- genofile = open(genofilename)
-
- for line in genofile:
- line = line.strip()
- if not line:
- continue
- if line.startswith(("#", "@")):
- continue
- break
-
- headers = line.split("\t")
-
- if headers[3] == "Mb":
- samplelist = headers[4:]
- else:
- samplelist = headers[3:]
- return samplelist
-
-
-def get_samplelist_from_plink(genofilename):
- genofile = open(genofilename)
-
- samplelist = []
- for line in genofile:
- line = line.split(" ")
- samplelist.append(line[1])
-
- return samplelist
diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py
deleted file mode 100644
index 9d12da8a..00000000
--- a/wqflask/maintenance/print_benchmark.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/python
-
-import time
-
-from pprint import pformat as pf
-
-
-class TheCounter:
- Counters = {}
-
- def __init__(self):
- start_time = time.time()
- for counter in range(170000):
- self.print_it(counter)
- self.time_took = time.time() - start_time
- TheCounter.Counters[self.__class__.__name__] = self.time_took
-
-
-class PrintAll(TheCounter):
- def print_it(self, counter):
- print(counter)
-
-
-class PrintSome(TheCounter):
- def print_it(self, counter):
- if counter % 1000 == 0:
- print(counter)
-
-
-class PrintNone(TheCounter):
- def print_it(self, counter):
- pass
-
-
-def new_main():
- print("Running new_main")
- tests = [PrintAll, PrintSome, PrintNone]
- for test in tests:
- test()
-
- print(pf(TheCounter.Counters))
-
-
-if __name__ == '__main__':
- new_main()
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
deleted file mode 100644
index 36049a82..00000000
--- a/wqflask/maintenance/quantile_normalize.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import sys
-sys.path.insert(0, './')
-import urllib.parse
-
-import numpy as np
-import pandas as pd
-
-from flask import Flask, g, request
-
-from wqflask import app
-from wqflask.database import database_connection
-from utility.tools import get_setting
-
-
-def create_dataframe(input_file):
- with open(input_file) as f:
- ncols = len(f.readline().split("\t"))
-
- input_array = np.loadtxt(open(
- input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
- return pd.DataFrame(input_array)
-
-# This function taken from https://github.com/ShawnLYU/Quantile_Normalize
-
-
-def quantileNormalize(df_input):
- df = df_input.copy()
- # compute rank
- dic = {}
- for col in df:
- dic.update({col: sorted(df[col])})
- sorted_df = pd.DataFrame(dic)
- rank = sorted_df.mean(axis=1).tolist()
- # sort
- for col in df:
- t = np.searchsorted(np.sort(df[col]), df[col])
- df[col] = [rank[i] for i in t]
- return df
-
-
-def set_data(cursor, dataset_name):
- orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
-
- sample_list = []
- with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
- for i, (line1, line2) in enumerate(zip(orig_fh, quant_fh)):
- trait_dict = {}
- sample_list = []
- if i == 0:
- sample_names = line1.split('\t')[1:]
- else:
- trait_name = line1.split('\t')[0]
- for i, sample in enumerate(sample_names):
- this_sample = {
- "name": sample,
- "value": line1.split('\t')[i + 1],
- "qnorm": line2.split('\t')[i + 1]
- }
- sample_list.append(this_sample)
- query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName
- FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet
- WHERE Species.Id = InbredSet.SpeciesId and
- InbredSet.Id = ProbeFreeze.InbredSetId and
- ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and
- ProbeSetFreeze.Name = '%s' and
- ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
- ProbeSetXRef.ProbeSetId = ProbeSet.Id and
- ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0])
- cursor.execute(query)
- result_info = cursor.fetchone()
-
- yield {
- "_index": "traits",
- "_type": "trait",
- "_source": {
- "name": trait_name,
- "species": result_info[0],
- "group": result_info[1],
- "dataset": dataset_name,
- "dataset_fullname": result_info[2],
- "samples": sample_list,
- "transform_types": "qnorm"
- }
- }
-
-
-if __name__ == '__main__':
- with database_connection(get_setting("SQL_URI")) as conn:
- with conn.cursor() as cursor:
- success, _ = bulk(es, set_data(cursor, sys.argv[1]))
-
- response = es.search(
- index="traits", doc_type="trait", body={
- "query": {"match": {"name": "ENSMUSG00000028982"}}
- }
- )
-
- print(response)
diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py
deleted file mode 100644
index cebe33c0..00000000
--- a/wqflask/maintenance/set_resource_defaults.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-
-Script that sets default resource access masks for use with the DB proxy
-
-Defaults will be:
-Owner - omni_gn
-Mask - Public/non-confidential: { data: "view",
- metadata: "view",
- admin: "not-admin" }
- Private/confidentia: { data: "no-access",
- metadata: "no-access",
- admin: "not-admin" }
-
-To run:
-./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
-
-"""
-
-import sys
-import json
-
-# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
-sys.path.insert(0, './')
-
-# NEW: import app to avoid a circular dependency on utility.tools
-from wqflask import app
-
-from utility import hmac
-from utility.tools import get_setting
-from utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info
-Redis = get_redis_conn()
-
-import urllib.parse
-
-from wqflask.database import database_connection
-
-
-def insert_probeset_resources(cursor, default_owner_id):
- current_resources = Redis.hgetall("resources")
- cursor.execute(""" SELECT
- ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public
- FROM
- ProbeSetFreeze""")
-
- resource_results = cursor.fetchall()
- for i, resource in enumerate(resource_results):
- resource_ob = {}
- resource_ob['name'] = resource[1]
- resource_ob['owner_id'] = default_owner_id
- resource_ob['data'] = {"dataset": str(resource[0])}
- resource_ob['type'] = "dataset-probeset"
- if resource[2] < 1 and resource[3] > 0:
- resource_ob['default_mask'] = {"data": "view",
- "metadata": "view",
- "admin": "not-admin"}
- else:
- resource_ob['default_mask'] = {"data": "no-access",
- "metadata": "no-access",
- "admin": "not-admin"}
- resource_ob['group_masks'] = {}
-
- add_resource(resource_ob, update=False)
-
-
-def insert_publish_resources(cursor, default_owner_id):
- current_resources = Redis.hgetall("resources")
- cursor.execute(""" SELECT
- PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode
- FROM
- PublishXRef, PublishFreeze, InbredSet, Publication
- WHERE
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- InbredSet.Id = PublishXRef.InbredSetId AND
- Publication.Id = PublishXRef.PublicationId""")
-
- resource_results = cursor.fetchall()
- for resource in resource_results:
- if resource[2]:
- resource_ob = {}
- if resource[2]:
- resource_ob['name'] = resource[2] + "_" + str(resource[0])
- else:
- resource_ob['name'] = str(resource[0])
- resource_ob['owner_id'] = default_owner_id
- resource_ob['data'] = {"dataset": str(resource[1]),
- "trait": str(resource[0])}
- resource_ob['type'] = "dataset-publish"
- resource_ob['default_mask'] = {"data": "view",
- "metadata": "view",
- "admin": "not-admin"}
-
- resource_ob['group_masks'] = {}
-
- add_resource(resource_ob, update=False)
- else:
- continue
-
-
-def insert_geno_resources(cursor, default_owner_id):
- current_resources = Redis.hgetall("resources")
- cursor.execute(""" SELECT
- GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality
- FROM
- GenoFreeze""")
-
- resource_results = cursor.fetchall()
- for i, resource in enumerate(resource_results):
- resource_ob = {}
- resource_ob['name'] = resource[1]
- if resource[1] == "HET3-ITPGeno":
- resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
- else:
- resource_ob['owner_id'] = default_owner_id
- resource_ob['data'] = {"dataset": str(resource[0])}
- resource_ob['type'] = "dataset-geno"
- if resource[2] < 1:
- resource_ob['default_mask'] = {"data": "view",
- "metadata": "view",
- "admin": "not-admin"}
- else:
- resource_ob['default_mask'] = {"data": "no-access",
- "metadata": "no-access",
- "admin": "not-admin"}
- resource_ob['group_masks'] = {}
-
- add_resource(resource_ob, update=False)
-
-
-def insert_resources(default_owner_id):
- current_resources = get_resources()
- print("START")
- insert_publish_resources(cursor, default_owner_id)
- print("AFTER PUBLISH")
- insert_geno_resources(cursor, default_owner_id)
- print("AFTER GENO")
- insert_probeset_resources(cursor, default_owner_id)
- print("AFTER PROBESET")
-
-
-def main(cursor):
- """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
-
- Redis.delete("resources")
-
- owner_id = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
-
- insert_resources(owner_id)
-
-
-if __name__ == '__main__':
- with database_connection(get_setting("SQL_URI")) as conn:
- with conn.cursor() as cursor:
- main(cursor)