about summary refs log tree commit diff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/README.md4
-rw-r--r--wqflask/maintenance/__init__.py0
-rw-r--r--wqflask/maintenance/convert_dryad_to_bimbam.py72
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py201
-rw-r--r--wqflask/maintenance/gen_ind_genofiles.py253
-rw-r--r--wqflask/maintenance/gen_select_dataset.py296
-rw-r--r--wqflask/maintenance/generate_kinship_from_bimbam.py66
-rw-r--r--wqflask/maintenance/generate_probesetfreeze_file.py122
-rw-r--r--wqflask/maintenance/geno_to_json.py196
-rw-r--r--wqflask/maintenance/get_group_samplelists.py47
-rw-r--r--wqflask/maintenance/print_benchmark.py45
-rw-r--r--wqflask/maintenance/quantile_normalize.py98
-rw-r--r--wqflask/maintenance/set_resource_defaults.py153
13 files changed, 0 insertions, 1553 deletions
diff --git a/wqflask/maintenance/README.md b/wqflask/maintenance/README.md
deleted file mode 100644
index 873eaa32..00000000
--- a/wqflask/maintenance/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-Maintenance files have been moved into a separate repository named
-*gn_extra*. See https://github.com/genenetwork/gn_extra
-
-
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/wqflask/maintenance/__init__.py
+++ /dev/null
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
deleted file mode 100644
index 18fbb8a1..00000000
--- a/wqflask/maintenance/convert_dryad_to_bimbam.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/python
-
-"""
-Convert data dryad files to a BIMBAM _geno and _snps file
-
-
-"""
-
-import sys
-sys.path.append("..")
-
-
-def read_dryad_file(filename):
-    exclude_count = 0
-    marker_list = []
-    sample_dict = {}
-    sample_list = []
-    geno_rows = []
-    with open(filename, 'r') as the_file:
-        for i, line in enumerate(the_file):
-            if i > 0:
-                if line.split(" ")[1] == "no":
-                    sample_name = line.split(" ")[0]
-                    sample_list.append(sample_name)
-                    sample_dict[sample_name] = line.split(" ")[2:]
-                else:
-                    exclude_count += 1
-            else:
-                marker_list = line.split(" ")[2:]
-
-    for i, marker in enumerate(marker_list):
-        this_row = []
-        this_row.append(marker)
-        this_row.append("X")
-        this_row.append("Y")
-        for sample in sample_list:
-            this_row.append(sample_dict[sample][i])
-        geno_rows.append(this_row)
-
-    print(exclude_count)
-
-    return geno_rows
-
-    # for i, marker in enumerate(marker_list):
-    #    this_row = []
-    #    this_row.append(marker)
-    #    this_row.append("X")
-    #    this_row.append("Y")
-    #    with open(filename, 'r') as the_file:
-    #        for j, line in enumerate(the_file):
-    #            if j > 0:
-    #                this_row.append(line.split(" ")[i+2])
-    #        print("row: " + str(i))
-    #        geno_rows.append(this_row)
-    #
-    # return geno_rows
-
-
-def write_bimbam_files(geno_rows):
-    with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
-        for row in geno_rows:
-            geno_fh.write(", ".join(row) + "\n")
-
-
-def convert_dryad_to_bimbam(filename):
-    geno_file_rows = read_dryad_file(filename)
-    write_bimbam_files(geno_file_rows)
-
-
-if __name__ == "__main__":
-    input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
-    convert_dryad_to_bimbam(input_filename)
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
deleted file mode 100644
index 078be529..00000000
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/python
-
-"""
-Convert .geno files to json
-
-This file goes through all of the genofiles in the genofile directory (.geno)
-and converts them to json files that are used when running the marker regression
-code
-
-"""
-
-import sys
-sys.path.append("..")
-import os
-import glob
-import traceback
-import gzip
-
-import simplejson as json
-
-from pprint import pformat as pf
-
-
-class EmptyConfigurations(Exception):
-    pass
-
-
-class Marker:
-    def __init__(self):
-        self.name = None
-        self.chr = None
-        self.cM = None
-        self.Mb = None
-        self.genotypes = []
-
-
-class ConvertGenoFile:
-
-    def __init__(self, input_file, output_files):
-        self.input_file = input_file
-        self.output_files = output_files
-
-        self.mb_exists = False
-        self.cm_exists = False
-        self.markers = []
-
-        self.latest_row_pos = None
-        self.latest_col_pos = None
-
-        self.latest_row_value = None
-        self.latest_col_value = None
-
-    def convert(self):
-        self.haplotype_notation = {
-            '@mat': "1",
-            '@pat': "0",
-            '@het': "0.5",
-            '@unk': "NA"
-        }
-
-        self.configurations = {}
-        self.input_fh = open(self.input_file)
-
-        self.process_csv()
-
-    def process_csv(self):
-        for row in self.process_rows():
-            row_items = row.split("\t")
-
-            this_marker = Marker()
-            this_marker.name = row_items[1]
-            this_marker.chr = row_items[0]
-            if self.cm_exists and self.mb_exists:
-                this_marker.cM = row_items[2]
-                this_marker.Mb = row_items[3]
-                genotypes = row_items[4:]
-            elif self.cm_exists:
-                this_marker.cM = row_items[2]
-                genotypes = row_items[3:]
-            elif self.mb_exists:
-                this_marker.Mb = row_items[2]
-                genotypes = row_items[3:]
-            else:
-                genotypes = row_items[2:]
-            for item_count, genotype in enumerate(genotypes):
-                if genotype.upper().strip() in self.configurations:
-                    this_marker.genotypes.append(
-                        self.configurations[genotype.upper().strip()])
-                else:
-                    this_marker.genotypes.append("NA")
-
-            self.markers.append(this_marker.__dict__)
-
-        self.write_to_bimbam()
-
-    def write_to_bimbam(self):
-        with open(self.output_files[0], "w") as geno_fh:
-            for marker in self.markers:
-                geno_fh.write(marker['name'])
-                geno_fh.write(", X, Y")
-                geno_fh.write(", " + ", ".join(marker['genotypes']))
-                geno_fh.write("\n")
-
-        with open(self.output_files[1], "w") as pheno_fh:
-            for sample in self.sample_list:
-                pheno_fh.write("1\n")
-
-        with open(self.output_files[2], "w") as snp_fh:
-            for marker in self.markers:
-                if self.mb_exists:
-                    snp_fh.write(
-                        marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
-                else:
-                    snp_fh.write(
-                        marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
-
-    def get_sample_list(self, row_contents):
-        self.sample_list = []
-        if self.mb_exists:
-            if self.cm_exists:
-                self.sample_list = row_contents[4:]
-            else:
-                self.sample_list = row_contents[3:]
-        else:
-            if self.cm_exists:
-                self.sample_list = row_contents[3:]
-            else:
-                self.sample_list = row_contents[2:]
-
-    def process_rows(self):
-        for self.latest_row_pos, row in enumerate(self.input_fh):
-            self.latest_row_value = row
-            # Take care of headers
-            if not row.strip():
-                continue
-            if row.startswith('#'):
-                continue
-            if row.startswith('Chr'):
-                if 'Mb' in row.split():
-                    self.mb_exists = True
-                if 'cM' in row.split():
-                    self.cm_exists = True
-                self.get_sample_list(row.split())
-                continue
-            if row.startswith('@'):
-                key, _separater, value = row.partition(':')
-                key = key.strip()
-                value = value.strip()
-                if key == "@filler":
-                    raise EmptyConfigurations
-                if key in self.haplotype_notation:
-                    self.configurations[value] = self.haplotype_notation[key]
-                continue
-            if not len(self.configurations):
-                raise EmptyConfigurations
-            yield row
-
-    @classmethod
-    def process_all(cls, old_directory, new_directory):
-        os.chdir(old_directory)
-        for input_file in glob.glob("*"):
-            if not input_file.endswith(('geno', '.geno.gz')):
-                continue
-            group_name = ".".join(input_file.split('.')[:-1])
-            if group_name == "HSNIH-Palmer":
-                continue
-            geno_output_file = os.path.join(
-                new_directory, group_name + "_geno.txt")
-            pheno_output_file = os.path.join(
-                new_directory, group_name + "_pheno.txt")
-            snp_output_file = os.path.join(
-                new_directory, group_name + "_snps.txt")
-            output_files = [geno_output_file,
-                            pheno_output_file, snp_output_file]
-            print("%s -> %s" % (
-                os.path.join(old_directory, input_file), geno_output_file))
-            convertob = ConvertGenoFile(input_file, output_files)
-            try:
-                convertob.convert()
-            except EmptyConfigurations as why:
-                print("  No config info? Continuing...")
-                continue
-            except Exception as why:
-                print("  Exception:", why)
-                print(traceback.print_exc())
-                print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                    convertob.latest_col_pos))
-                print("    Column is:", convertob.latest_col_value)
-                print("    Row is:", convertob.latest_row_value)
-                break
-
-
-if __name__ == "__main__":
-    Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
-    New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
-    #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
-    #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
-    #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
-    # convertob.convert()
-    ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
-    # ConvertGenoFiles(Geno_Directory)
diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py
deleted file mode 100644
index b755c648..00000000
--- a/wqflask/maintenance/gen_ind_genofiles.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env python3
-"""A script that generates the genotype files for groups of individuals, using an existing strain genotype file as a basis
-
-Example commands:
-python3 gen_ind_genofiles.py
-        /home/zas1024/gn2-zach/genotype_files/genotype/
-        /home/zas1024/gn2-zach/new_geno/
-        BXD-Micturition.geno
-        BXD.json
-python3 gen_ind_genofiles.py
-        /home/zas1024/gn2-zach/genotype_files/genotype
-        /home/zas1024/gn2-zach/new_geno/
-        BXD-Micturition.geno
-        BXD.2.geno BXD.4.geno BXD.5.geno
-
-"""
-
-import json
-import os
-import sys
-from typing import List
-
-import MySQLdb
-
-def conn():
-    return MySQLdb.Connect(db=os.environ.get("DB_NAME"),
-                           user=os.environ.get("DB_USER"),
-                           passwd=os.environ.get("DB_PASS"),
-                           host=os.environ.get("DB_HOST"))
-
-def main(args):
-
-    # Directory in which .geno files are located
-    geno_dir = args[1]
-
-    # Directory in which to output new files
-    out_dir = args[2]
-
-    # The individuals group that we want to generate a .geno file for
-    target_file = geno_dir + args[3]
-
-    # The source group(s) we're generating the .geno files from
-    # This can be passed as either a specific .geno file (or set of files as multiple arguments),
-    # or as a JSON file containing a set of .geno files (and their corresponding file names and sample lists)
-    geno_json = {}
-    source_files = []
-    if ".json" in args[4]:
-        geno_json = json.load(open(geno_dir + args[4], "r"))
-        par_f1s = {
-            "mat": geno_json['mat'],
-            "pat": geno_json['pat'],
-            "f1s": geno_json['f1s']
-        }
-
-        # List of file titles and locations from JSON
-        source_files = [{'title': genofile['title'], 'location': geno_dir + genofile['location']} for genofile in geno_json['genofile']]
-    else:
-        par_f1s = {}
-        # List of files directly taken from command line arguments, with titles just set to the filename
-        for group in args[4:]:
-            file_name = geno_dir + group + ".geno" if ".geno" not in group else geno_dir + group
-            source_files.append({'title': file_name[:-5], 'location': file_name})
-
-    if len(source_files) > 1:
-        # Generate a JSON file pointing to the new target genotype files, in situations where there are multiple source .geno files
-        target_json_loc = out_dir + ".".join(args[3].split(".")[:-1]) + ".json"
-        target_json = {'genofile': []}
-
-        # Generate the output .geno files
-        for source_file in source_files:
-            filename, samples = generate_new_genofile(source_file['location'], target_file, par_f1s, out_dir)
-
-            target_json['genofile'].append({
-                'location': filename.split("/")[-1],
-                'title': source_file['title'],
-                'sample_list': samples
-            })
-
-        json.dump(target_json, open(target_json_loc, "w"))
-    else:
-        filename, samples = generate_new_genofile(source_files[0]['location'], target_file, par_f1s, out_dir)
-
-def get_strain_for_sample(sample):
-    query = (
-        "SELECT CaseAttributeXRefNew.Value "
-        "FROM CaseAttributeXRefNew, Strain "
-        "WHERE CaseAttributeXRefNew.CaseAttributeId=11 "
-        "AND CaseAttributeXRefNew.StrainId = Strain.Id "
-        "AND Strain.Name = %(name)s" )
-
-    with conn().cursor() as cursor:
-        cursor.execute(query, {"name": sample.strip()})
-        strain = cursor.fetchone()[0]
-        return strain
-
-def generate_new_genofile(source_genofile, target_genofile, par_f1s, out_dir):
-    source_samples = group_samples(source_genofile)
-    source_genotypes = strain_genotypes(source_genofile)
-    target_samples = group_samples(target_genofile)
-    strain_pos_map = map_strain_pos_to_target_group(source_samples, target_samples, par_f1s)
-
-    if len(source_genofile.split("/")[-1].split(".")) > 2:
-        # The number in the source genofile; for example 4 in BXD.4.geno
-        source_num = source_genofile.split("/")[-1].split(".")[-2]
-        target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + "." + source_num + ".geno"
-    else:
-        target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + ".geno"
-
-    file_location = out_dir + target_filename
-
-    with open(file_location, "w") as fh:
-        for metadata in ["name", "type", "mat", "pat", "het", "unk"]:
-            fh.write("@" + metadata + ":" + source_genotypes[metadata] + "\n")
-
-        header_line = ["Chr", "Locus", "cM", "Mb"] + target_samples
-        fh.write("\t".join(header_line) + "\n")
-
-        for marker in source_genotypes['markers']:
-            line_items = [
-                marker['Chr'],
-                marker['Locus'],
-                marker['cM'],
-                marker['Mb']
-            ]
-
-            for pos in strain_pos_map:
-                if isinstance(pos, int):
-                    line_items.append(marker['genotypes'][pos])
-                else:
-                    if pos in ["mat", "pat"]:
-                        line_items.append(source_genotypes[pos])
-                    elif pos == "f1s":
-                        line_items.append("H")
-                    else:
-                        line_items.append("U")
-
-            fh.write("\t".join(line_items) + "\n")
-
-    return file_location, target_samples
-
-def map_strain_pos_to_target_group(source_samples, target_samples, par_f1s):
-    """
-    Retrieve corresponding strain position for each sample in the target group
-
-    This is so the genotypes from the base genofile can be mapped to the samples in the target group
-
-    For example:
-    Base strains: BXD1, BXD2, BXD3
-    Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3
-    Returns: [0, 0, 1, 2, 2, 2]
-    """
-    pos_map = []
-    for sample in target_samples:
-        sample_strain = get_strain_for_sample(sample)
-        if sample_strain in source_samples:
-            pos_map.append(source_samples.index(sample_strain))
-        else:
-            val = "U"
-            for key in par_f1s.keys():
-                if sample_strain in par_f1s[key]:
-                    val = key
-            pos_map.append(val)
-
-    return pos_map
-
-def group_samples(target_file: str) -> List:
-    """
-    Get the group samples from its "dummy" .geno file (which still contains the sample list)
-    """
-
-    sample_list = []
-    with open(target_file, "r") as target_geno:
-        for i, line in enumerate(target_geno):
-            # Skip header lines
-            if line[0] in ["#", "@"] or not len(line):
-                continue
-    
-            line_items = line.split()
-
-            sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]]
-            break
-
-    return sample_list
-
-def strain_genotypes(strain_genofile: str) -> List:
-    """
-    Read genotypes from source strain .geno file
-
-    :param strain_genofile: string of genofile filename
-    :return: a list of dictionaries representing each marker's genotypes
-
-    Example output: [
-        {
-            'Chr': '1',
-            'Locus': 'marker1',
-            'Mb': '10.0',
-            'cM': '8.0',
-            'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...]
-        },
-        ...
-    ]
-    """
-
-    geno_dict = {}
-
-    geno_start_col = None
-    header_columns = []
-    sample_list = []
-    markers = []
-    with open(strain_genofile, "r") as source_geno:
-        for i, line in enumerate(source_geno):
-            if line[0] == "@":
-                metadata_type = line[1:].split(":")[0]
-                if metadata_type in ['name', 'type', 'mat', 'pat', 'het', 'unk']:
-                    geno_dict[metadata_type] = line.split(":")[1].strip()
-
-                continue
-
-            # Skip other header lines
-            if line[0] == "#" or not len(line):
-                continue
-
-            line_items = line.split("\t")
-            if "Chr" in line_items: # Header row
-                # Get the first column index containing genotypes
-                header_columns = line_items
-                for j, item in enumerate(line_items):
-                    if item not in ["Chr", "Locus", "Mb", "cM"]:
-                        geno_start_col = j
-                        break
-
-                sample_list = line_items[geno_start_col:]
-                if not geno_start_col:
-                    print("Check .geno file - expected columns not found")
-                    sys.exit()
-            else: # Marker rows
-                this_marker = {
-                    'Chr': line_items[header_columns.index("Chr")],
-                    'Locus': line_items[header_columns.index("Locus")],
-                    'Mb': line_items[header_columns.index("Mb")],
-                    'cM': line_items[header_columns.index("cM")],
-                    'genotypes': [item.strip() for item in line_items][geno_start_col:]
-                }
-
-                markers.append(this_marker)
-
-    geno_dict['markers'] = markers
-
-    return geno_dict
-            
-if __name__ == "__main__":
-    main(sys.argv)
-
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
deleted file mode 100644
index 01b2fc15..00000000
--- a/wqflask/maintenance/gen_select_dataset.py
+++ /dev/null
@@ -1,296 +0,0 @@
-"""Script that generates the data for the main dropdown menus on the home page
-
-Writes out data as /static/new/javascript/dataset_menu_structure.json
-It needs to be run manually when database has been changed. Run it as
-
-  ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
-
-"""
-
-
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams
-# at rwilliams@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-
-import sys
-
-# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
-sys.path.insert(0, './')
-# NEW: import app to avoid a circular dependency on utility.tools
-from wqflask import app
-
-from utility.tools import get_setting
-
-import simplejson as json
-import urllib.parse
-
-
-from pprint import pformat as pf
-
-from wqflask.database import database_connection
-
-
-def get_species(cursor):
-    """Build species list"""
-    #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
-    cursor.execute("select Name, MenuName from Species order by OrderId")
-    species = list(cursor.fetchall())
-    return species
-
-
-def get_groups(cursor, species):
-    """Build groups list"""
-    groups = {}
-    for species_name, _species_full_name in species:
-        cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet,
-                       Species,
-                       ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s'
-                       and InbredSet.SpeciesId = Species.Id and
-                       (PublishFreeze.InbredSetId = InbredSet.Id
-                        or GenoFreeze.InbredSetId = InbredSet.Id
-                        or ProbeFreeze.InbredSetId = InbredSet.Id)
-                        group by InbredSet.Name
-                        order by InbredSet.FullName""" % species_name)
-        results = cursor.fetchall()
-        groups[species_name] = list(results)
-    return groups
-
-
-def get_types(groups):
-    """Build types list"""
-    types = {}
-    #print("Groups: ", pf(groups))
-    for species, group_dict in list(groups.items()):
-        types[species] = {}
-        for group_name, _group_full_name in group_dict:
-            # make group an alias to shorten the code
-            #types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")]
-            if phenotypes_exist(group_name):
-                types[species][group_name] = [("Phenotypes", "Phenotypes")]
-            if genotypes_exist(group_name):
-                if group_name in types[species]:
-                    types[species][group_name] += [("Genotypes", "Genotypes")]
-                else:
-                    types[species][group_name] = [("Genotypes", "Genotypes")]
-            if group_name in types[species]:
-                types_list = build_types(species, group_name)
-                if len(types_list) > 0:
-                    types[species][group_name] += types_list
-                else:
-                    if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
-                        types[species].pop(group_name, None)
-                        groups[species] = tuple(
-                            group for group in groups[species] if group[0] != group_name)
-            else:  # ZS: This whole else statement might be unnecessary, need to check
-                types_list = build_types(species, group_name)
-                if len(types_list) > 0:
-                    types[species][group_name] = types_list
-                else:
-                    types[species].pop(group_name, None)
-                    groups[species] = tuple(
-                        group for group in groups[species] if group[0] != group_name)
-    return types
-
-
-def phenotypes_exist(group_name):
-    #print("group_name:", group_name)
-    Cursor.execute("""select Name from PublishFreeze
-                      where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
-
-    results = Cursor.fetchone()
-    #print("RESULTS:", results)
-
-    if results != None:
-        return True
-    else:
-        return False
-
-
-def genotypes_exist(group_name):
-    #print("group_name:", group_name)
-    Cursor.execute("""select Name from GenoFreeze
-                      where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
-
-    results = Cursor.fetchone()
-    #print("RESULTS:", results)
-
-    if results != None:
-        return True
-    else:
-        return False
-
-
-def build_types(species, group):
-    """Fetches tissues
-
-    Gets the tissues with data for this species/group
-    (all types except phenotype/genotype are tissues)
-
-    """
-
-    Cursor.execute("""select distinct Tissue.Name
-                       from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
-                       where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
-                       InbredSet.Name = '%s' and
-                       ProbeFreeze.TissueId = Tissue.Id and
-                       ProbeFreeze.InbredSetId = InbredSet.Id and
-                       ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
-                       ProbeSetFreeze.public > 0 and
-                       ProbeSetFreeze.confidentiality < 1
-                       order by Tissue.Name""" % (species, group))
-
-    results = []
-    for result in Cursor.fetchall():
-        if len(result):
-            these_datasets = build_datasets(species, group, result[0])
-            if len(these_datasets) > 0:
-                results.append((result[0], result[0]))
-
-    return results
-
-
-def get_datasets(types):
-    """Build datasets list"""
-    datasets = {}
-    for species, group_dict in list(types.items()):
-        datasets[species] = {}
-        for group, type_list in list(group_dict.items()):
-            datasets[species][group] = {}
-            for type_name in type_list:
-                these_datasets = build_datasets(species, group, type_name[0])
-                if len(these_datasets) > 0:
-                    datasets[species][group][type_name[0]] = these_datasets
-
-    return datasets
-
-
-def build_datasets(species, group, type_name):
-    """Gets dataset names from database"""
-    dataset_text = dataset_value = None
-    datasets = []
-    if type_name == "Phenotypes":
-        Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where
-                    InbredSet.Name = '%s' and
-                    PublishFreeze.InbredSetId = InbredSet.Id and
-                    InfoFiles.InfoPageName = PublishFreeze.Name order by
-                    PublishFreeze.CreateTime asc""" % group)
-
-        results = Cursor.fetchall()
-        if len(results) > 0:
-            for result in results:
-                print(result)
-                dataset_id = str(result[0])
-                dataset_value = str(result[1])
-                if group == 'MDP':
-                    dataset_text = "Mouse Phenome Database"
-                else:
-                    #dataset_text = "%s Phenotypes" % group
-                    dataset_text = str(result[2])
-                datasets.append((dataset_id, dataset_value, dataset_text))
-        else:
-            dataset_id = "None"
-            dataset_value = "%sPublish" % group
-            dataset_text = "%s Phenotypes" % group
-            datasets.append((dataset_id, dataset_value, dataset_text))
-
-    elif type_name == "Genotypes":
-        Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
-                    InbredSet.Name = '%s' and
-                    GenoFreeze.InbredSetId = InbredSet.Id and
-                    InfoFiles.InfoPageName = GenoFreeze.ShortName and
-                    GenoFreeze.public > 0 and
-                    GenoFreeze.confidentiality < 1 order by
-                    GenoFreeze.CreateTime desc""" % group)
-
-        results = Cursor.fetchone()
-        if results != None:
-            dataset_id = str(results[0])
-        else:
-            dataset_id = "None"
-        dataset_value = "%sGeno" % group
-        dataset_text = "%s Genotypes" % group
-        datasets.append((dataset_id, dataset_value, dataset_text))
-
-    else:  # for mRNA expression/ProbeSet
-        Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
-                    ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
-                    Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
-                    InbredSet.Name = '%s' and
-                    ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and
-                    ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and
-                    ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by
-                    ProbeSetFreeze.CreateTime desc""" % (species, group, type_name))
-
-        dataset_results = Cursor.fetchall()
-        datasets = []
-        for dataset_info in dataset_results:
-            this_dataset_info = []
-            for info in dataset_info:
-                this_dataset_info.append(str(info))
-            datasets.append(this_dataset_info)
-
-    return datasets
-
-
-def main(cursor):
-    """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
-
-    species = get_species(cursor)
-    groups = get_groups(cursor, species)
-    types = get_types(groups)
-    datasets = get_datasets(types)
-
-    #species.append(('All Species', 'All Species'))
-    #groups['All Species'] = [('All Groups', 'All Groups')]
-    #types['All Species'] = {}
-    #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
-    #datasets['All Species'] = {}
-    #datasets['All Species']['All Groups'] = {}
-    #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
-
-    data = dict(species=species,
-                groups=groups,
-                types=types,
-                datasets=datasets,
-                )
-
-    #print("data:", data)
-
-    output_file = """./wqflask/static/new/javascript/dataset_menu_structure.json"""
-
-    with open(output_file, 'w') as fh:
-        json.dump(data, fh, indent="   ", sort_keys=True)
-
-    #print("\nWrote file to:", output_file)
-
-
-def _test_it():
-    """Used for internal testing only"""
-    types = build_types("Mouse", "BXD")
-    #print("build_types:", pf(types))
-    datasets = build_datasets("Mouse", "BXD", "Hippocampus")
-    #print("build_datasets:", pf(datasets))
-
-
-if __name__ == '__main__':
-    with database_connection(get_setting("SQL_URI")) as conn:
-        with conn.cursor() as cursor:
-            main(cursor)
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
deleted file mode 100644
index 9f01d094..00000000
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/python
-
-"""
-Generate relatedness matrix files for GEMMA from BIMBAM genotype/phenotype files
-
-This file goes through all of the BIMBAM files in the bimbam diretory
-and uses GEMMA to generate their corresponding kinship/relatedness matrix file
-
-"""
-
-import sys
-sys.path.append("..")
-import os
-import glob
-
-
-class GenerateKinshipMatrices:
-    def __init__(self, group_name, geno_file, pheno_file):
-        self.group_name = group_name
-        self.geno_file = geno_file
-        self.pheno_file = pheno_file
-
-    def generate_kinship(self):
-        gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \
-            " -p " + self.pheno_file + \
-            " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
-        print("command:", gemma_command)
-        os.system(gemma_command)
-
-    @classmethod
-    def process_all(self, geno_dir, bimbam_dir):
-        os.chdir(geno_dir)
-        for input_file in glob.glob("*"):
-            if not input_file.endswith(('geno', '.geno.gz')):
-                continue
-            group_name = ".".join(input_file.split('.')[:-1])
-            if group_name == "HSNIH-Palmer":
-                continue
-            geno_input_file = os.path.join(
-                bimbam_dir, group_name + "_geno.txt")
-            pheno_input_file = os.path.join(
-                bimbam_dir, group_name + "_pheno.txt")
-            convertob = GenerateKinshipMatrices(
-                group_name, geno_input_file, pheno_input_file)
-            try:
-                convertob.generate_kinship()
-            except EmptyConfigurations as why:
-                print("  No config info? Continuing...")
-                continue
-            except Exception as why:
-
-                print("  Exception:", why)
-                print(traceback.print_exc())
-                print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                    convertob.latest_col_pos))
-                print("    Column is:", convertob.latest_col_value)
-                print("    Row is:", convertob.latest_row_value)
-                break
-
-
-if __name__ == "__main__":
-    Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/"""
-    Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/"""
-    GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
-
-    # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py
deleted file mode 100644
index 2f917c71..00000000
--- a/wqflask/maintenance/generate_probesetfreeze_file.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/python
-
-import sys
-
-# sys.path.insert(0, "..") - why?
-
-import os
-import collections
-import csv
-
-from base import webqtlConfig
-
-from pprint import pformat as pf
-
-from utility.tools import get_setting
-from wqflask.database import database_connection
-
-
-def show_progress(process, counter):
-    if counter % 1000 == 0:
-        print("{}: {}".format(process, counter))
-
-
-def get_strains(cursor):
-    cursor.execute("""select Strain.Name
-                      from Strain, StrainXRef, InbredSet
-                      where Strain.Id = StrainXRef.StrainId and
-                            StrainXRef.InbredSetId = InbredSet.Id
-                            and InbredSet.Name=%s;
-                """, "BXD")
-
-    strains = [strain[0] for strain in cursor.fetchall()]
-    print("strains:", pf(strains))
-    for strain in strains:
-        print(" -", strain)
-
-    return strains
-
-
-def get_probeset_vals(cursor, dataset_name):
-    cursor.execute(""" select ProbeSet.Id, ProbeSet.Name
-                from ProbeSetXRef,
-                     ProbeSetFreeze,
-                     ProbeSet
-                where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and
-                      ProbeSetFreeze.Name = %s and
-                      ProbeSetXRef.ProbeSetId = ProbeSet.Id;
-            """, dataset_name)
-
-    probesets = cursor.fetchall()
-
-    print("Fetched probesets")
-
-    probeset_vals = collections.OrderedDict()
-
-    for counter, probeset in enumerate(probesets):
-        cursor.execute(""" select Strain.Name, ProbeSetData.value
-                       from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain
-                       where ProbeSetData.Id = ProbeSetXRef.DataId
-                       and ProbeSetData.StrainId = Strain.Id
-                       and ProbeSetXRef.ProbeSetId = %s
-                       and ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId
-                       and ProbeSetFreeze.Name = %s;
-                """, (probeset[0], dataset_name))
-        val_dic = collections.OrderedDict()
-        vals = cursor.fetchall()
-        for val in vals:
-            val_dic[val[0]] = val[1]
-
-        probeset_vals[probeset[1]] = val_dic
-        show_progress("Querying DB", counter)
-
-    return probeset_vals
-
-
-def trim_strains(strains, probeset_vals):
-    trimmed_strains = []
-    #print("probeset_vals is:", pf(probeset_vals))
-    first_probeset = list(probeset_vals.values())[0]
-    print("\n**** first_probeset is:", pf(first_probeset))
-    for strain in strains:
-        print("\n**** strain is:", pf(strain))
-        if strain in first_probeset:
-            trimmed_strains.append(strain)
-    print("trimmed_strains:", pf(trimmed_strains))
-    return trimmed_strains
-
-
-def write_data_matrix_file(strains, probeset_vals, filename):
-    with open(filename, "wb") as fh:
-        csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL)
-        #print("strains is:", pf(strains))
-        csv_writer.writerow(['ID'] + strains)
-        for counter, probeset in enumerate(probeset_vals):
-            row_data = [probeset]
-            for strain in strains:
-                #print("probeset is: ", pf(probeset_vals[probeset]))
-                row_data.append(probeset_vals[probeset][strain])
-            #print("row_data is: ", pf(row_data))
-            csv_writer.writerow(row_data)
-            show_progress("Writing", counter)
-
-
-def main():
-    filename = os.path.expanduser(
-        "~/gene/wqflask/maintenance/"
-        "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2"
-        "(Oct08)_RankInv_Beta.txt")
-    dataset_name = "Eye_AXBXA_1008_RankInv"
-
-    with database_connection(get_setting("SQL_URI")) as conn:
-        with conn.cursor() as cursor:
-            strains = get_strains(cursor)
-            print("Getting probset_vals")
-            probeset_vals = get_probeset_vals(cursor, dataset_name)
-            print("Finished getting probeset_vals")
-            trimmed_strains = trim_strains(strains, probeset_vals)
-            write_data_matrix_file(trimmed_strains, probeset_vals, filename)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
deleted file mode 100644
index 32e0e34b..00000000
--- a/wqflask/maintenance/geno_to_json.py
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/python
-
-"""
-Convert .geno files to json
-
-This file goes through all of the genofiles in the genofile directory (.geno)
-and converts them to json files that are used when running the marker regression
-code
-
-"""
-
-import sys
-sys.path.append("..")
-import os
-import glob
-import traceback
-import gzip
-
-#import numpy as np
-#from pyLMM import lmm
-
-import simplejson as json
-
-from pprint import pformat as pf
-
-#from utility.tools import flat_files
-
-
-class EmptyConfigurations(Exception):
-    pass
-
-
-class Marker:
-    def __init__(self):
-        self.name = None
-        self.chr = None
-        self.cM = None
-        self.Mb = None
-        self.genotypes = []
-
-
-class ConvertGenoFile:
-
-    def __init__(self, input_file, output_file):
-
-        self.input_file = input_file
-        self.output_file = output_file
-
-        self.mb_exists = False
-        self.cm_exists = False
-        self.markers = []
-
-        self.latest_row_pos = None
-        self.latest_col_pos = None
-
-        self.latest_row_value = None
-        self.latest_col_value = None
-
-    def convert(self):
-
-        self.haplotype_notation = {
-            '@mat': "1",
-            '@pat': "0",
-            '@het': "0.5",
-            '@unk': "NA"
-        }
-
-        self.configurations = {}
-        #self.skipped_cols = 3
-
-        # if self.input_file.endswith(".geno.gz"):
-        #    print("self.input_file: ", self.input_file)
-        #    self.input_fh = gzip.open(self.input_file)
-        # else:
-        self.input_fh = open(self.input_file)
-
-        with open(self.output_file, "w") as self.output_fh:
-            # if self.file_type == "geno":
-            self.process_csv()
-            # elif self.file_type == "snps":
-            #    self.process_snps_file()
-
-    def process_csv(self):
-        for row_count, row in enumerate(self.process_rows()):
-            row_items = row.split("\t")
-
-            this_marker = Marker()
-            this_marker.name = row_items[1]
-            this_marker.chr = row_items[0]
-            if self.cm_exists and self.mb_exists:
-                this_marker.cM = row_items[2]
-                this_marker.Mb = row_items[3]
-                genotypes = row_items[4:]
-            elif self.cm_exists:
-                this_marker.cM = row_items[2]
-                genotypes = row_items[3:]
-            elif self.mb_exists:
-                this_marker.Mb = row_items[2]
-                genotypes = row_items[3:]
-            else:
-                genotypes = row_items[2:]
-            for item_count, genotype in enumerate(genotypes):
-                if genotype.upper() in self.configurations:
-                    this_marker.genotypes.append(
-                        self.configurations[genotype.upper()])
-                else:
-                    this_marker.genotypes.append("NA")
-
-            #print("this_marker is:", pf(this_marker.__dict__))
-            # if this_marker.chr == "14":
-            self.markers.append(this_marker.__dict__)
-
-        with open(self.output_file, 'w') as fh:
-            json.dump(self.markers, fh, indent="   ", sort_keys=True)
-
-            # print('configurations:', str(configurations))
-            #self.latest_col_pos = item_count + self.skipped_cols
-            #self.latest_col_value = item
-
-            # if item_count != 0:
-            #    self.output_fh.write(" ")
-            # self.output_fh.write(self.configurations[item.upper()])
-
-            # self.output_fh.write("\n")
-
-    def process_rows(self):
-        for self.latest_row_pos, row in enumerate(self.input_fh):
-            # if self.input_file.endswith(".geno.gz"):
-            #    print("row: ", row)
-            self.latest_row_value = row
-            # Take care of headers
-            if not row.strip():
-                continue
-            if row.startswith('#'):
-                continue
-            if row.startswith('Chr'):
-                if 'Mb' in row.split():
-                    self.mb_exists = True
-                if 'cM' in row.split():
-                    self.cm_exists = True
-                continue
-            if row.startswith('@'):
-                key, _separater, value = row.partition(':')
-                key = key.strip()
-                value = value.strip()
-                if key in self.haplotype_notation:
-                    self.configurations[value] = self.haplotype_notation[key]
-                continue
-            if not len(self.configurations):
-                raise EmptyConfigurations
-            yield row
-
-    @classmethod
-    def process_all(cls, old_directory, new_directory):
-        os.chdir(old_directory)
-        for input_file in glob.glob("*"):
-            if not input_file.endswith(('geno', '.geno.gz')):
-                continue
-            group_name = ".".join(input_file.split('.')[:-1])
-            output_file = os.path.join(new_directory, group_name + ".json")
-            print("%s -> %s" % (
-                os.path.join(old_directory, input_file), output_file))
-            convertob = ConvertGenoFile(input_file, output_file)
-            try:
-                convertob.convert()
-            except EmptyConfigurations as why:
-                print("  No config info? Continuing...")
-                #excepted = True
-                continue
-            except Exception as why:
-
-                print("  Exception:", why)
-                print(traceback.print_exc())
-                print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                    convertob.latest_col_pos))
-                print("    Column is:", convertob.latest_col_value)
-                print("    Row is:", convertob.latest_row_value)
-                break
-
-    # def process_snps_file(cls, snps_file, new_directory):
-    #    output_file = os.path.join(new_directory, "mouse_families.json")
-    #    print("%s -> %s" % (snps_file, output_file))
-    #    convertob = ConvertGenoFile(input_file, output_file)
-
-
-if __name__ == "__main__":
-    Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
-    New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
-    #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
-    #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
-    #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
-    # convertob.convert()
-    ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
-    # ConvertGenoFiles(Geno_Directory)
-
-    #process_csv(Input_File, Output_File)
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
deleted file mode 100644
index 0a450d3f..00000000
--- a/wqflask/maintenance/get_group_samplelists.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import os
-import glob
-import gzip
-
-from base import webqtlConfig
-
-
-def get_samplelist(file_type, geno_file):
-    if file_type == "geno":
-        return get_samplelist_from_geno(geno_file)
-    elif file_type == "plink":
-        return get_samplelist_from_plink(geno_file)
-
-
-def get_samplelist_from_geno(genofilename):
-    if os.path.isfile(genofilename + '.gz'):
-        genofilename += '.gz'
-        genofile = gzip.open(genofilename)
-    else:
-        genofile = open(genofilename)
-
-    for line in genofile:
-        line = line.strip()
-        if not line:
-            continue
-        if line.startswith(("#", "@")):
-            continue
-        break
-
-    headers = line.split("\t")
-
-    if headers[3] == "Mb":
-        samplelist = headers[4:]
-    else:
-        samplelist = headers[3:]
-    return samplelist
-
-
-def get_samplelist_from_plink(genofilename):
-    genofile = open(genofilename)
-
-    samplelist = []
-    for line in genofile:
-        line = line.split(" ")
-        samplelist.append(line[1])
-
-    return samplelist
diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py
deleted file mode 100644
index 9d12da8a..00000000
--- a/wqflask/maintenance/print_benchmark.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/python
-
-import time
-
-from pprint import pformat as pf
-
-
-class TheCounter:
-    Counters = {}
-
-    def __init__(self):
-        start_time = time.time()
-        for counter in range(170000):
-            self.print_it(counter)
-        self.time_took = time.time() - start_time
-        TheCounter.Counters[self.__class__.__name__] = self.time_took
-
-
-class PrintAll(TheCounter):
-    def print_it(self, counter):
-        print(counter)
-
-
-class PrintSome(TheCounter):
-    def print_it(self, counter):
-        if counter % 1000 == 0:
-            print(counter)
-
-
-class PrintNone(TheCounter):
-    def print_it(self, counter):
-        pass
-
-
-def new_main():
-    print("Running new_main")
-    tests = [PrintAll, PrintSome, PrintNone]
-    for test in tests:
-        test()
-
-    print(pf(TheCounter.Counters))
-
-
-if __name__ == '__main__':
-    new_main()
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
deleted file mode 100644
index 36049a82..00000000
--- a/wqflask/maintenance/quantile_normalize.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import sys
-sys.path.insert(0, './')
-import urllib.parse
-
-import numpy as np
-import pandas as pd
-
-from flask import Flask, g, request
-
-from wqflask import app
-from wqflask.database import database_connection
-from utility.tools import get_setting
-
-
-def create_dataframe(input_file):
-    with open(input_file) as f:
-        ncols = len(f.readline().split("\t"))
-
-    input_array = np.loadtxt(open(
-        input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
-    return pd.DataFrame(input_array)
-
-# This function taken from https://github.com/ShawnLYU/Quantile_Normalize
-
-
-def quantileNormalize(df_input):
-    df = df_input.copy()
-    # compute rank
-    dic = {}
-    for col in df:
-        dic.update({col: sorted(df[col])})
-    sorted_df = pd.DataFrame(dic)
-    rank = sorted_df.mean(axis=1).tolist()
-    # sort
-    for col in df:
-        t = np.searchsorted(np.sort(df[col]), df[col])
-        df[col] = [rank[i] for i in t]
-    return df
-
-
-def set_data(cursor, dataset_name):
-    orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
-
-    sample_list = []
-    with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
-        for i, (line1, line2) in enumerate(zip(orig_fh, quant_fh)):
-            trait_dict = {}
-            sample_list = []
-            if i == 0:
-                sample_names = line1.split('\t')[1:]
-            else:
-                trait_name = line1.split('\t')[0]
-                for i, sample in enumerate(sample_names):
-                    this_sample = {
-                        "name": sample,
-                        "value": line1.split('\t')[i + 1],
-                        "qnorm": line2.split('\t')[i + 1]
-                    }
-                    sample_list.append(this_sample)
-                query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName
-                           FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet
-                           WHERE Species.Id = InbredSet.SpeciesId and
-                                 InbredSet.Id = ProbeFreeze.InbredSetId and
-                                 ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and
-                                 ProbeSetFreeze.Name = '%s' and
-                                 ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
-                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id and
-                                 ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0])
-                cursor.execute(query)
-                result_info = cursor.fetchone()
-
-                yield {
-                    "_index": "traits",
-                    "_type": "trait",
-                    "_source": {
-                        "name": trait_name,
-                        "species": result_info[0],
-                        "group": result_info[1],
-                        "dataset": dataset_name,
-                        "dataset_fullname": result_info[2],
-                        "samples": sample_list,
-                        "transform_types": "qnorm"
-                    }
-                }
-
-
-if __name__ == '__main__':
-    with database_connection(get_setting("SQL_URI")) as conn:
-        with conn.cursor() as cursor:
-            success, _ = bulk(es, set_data(cursor, sys.argv[1]))
-
-            response = es.search(
-                index="traits", doc_type="trait", body={
-                    "query": {"match": {"name": "ENSMUSG00000028982"}}
-                }
-            )
-
-            print(response)
diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py
deleted file mode 100644
index cebe33c0..00000000
--- a/wqflask/maintenance/set_resource_defaults.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-
-Script that sets default resource access masks for use with the DB proxy
-
-Defaults will be:
-Owner - omni_gn
-Mask  - Public/non-confidential: { data: "view",
-                                   metadata: "view",
-                                   admin: "not-admin" }
-        Private/confidentia:     { data: "no-access",
-                                   metadata: "no-access",
-                                   admin: "not-admin" }
-
-To run:
-./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
-
-"""
-
-import sys
-import json
-
-# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
-sys.path.insert(0, './')
-
-# NEW: import app to avoid a circular dependency on utility.tools
-from wqflask import app
-
-from utility import hmac
-from utility.tools import get_setting
-from utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info
-Redis = get_redis_conn()
-
-import urllib.parse
-
-from wqflask.database import database_connection
-
-
-def insert_probeset_resources(cursor, default_owner_id):
-    current_resources = Redis.hgetall("resources")
-    cursor.execute("""  SELECT
-                            ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public
-                        FROM
-                            ProbeSetFreeze""")
-
-    resource_results = cursor.fetchall()
-    for i, resource in enumerate(resource_results):
-        resource_ob = {}
-        resource_ob['name'] = resource[1]
-        resource_ob['owner_id'] = default_owner_id
-        resource_ob['data'] = {"dataset": str(resource[0])}
-        resource_ob['type'] = "dataset-probeset"
-        if resource[2] < 1 and resource[3] > 0:
-            resource_ob['default_mask'] = {"data": "view",
-                                           "metadata": "view",
-                                           "admin": "not-admin"}
-        else:
-            resource_ob['default_mask'] = {"data": "no-access",
-                                           "metadata": "no-access",
-                                           "admin": "not-admin"}
-        resource_ob['group_masks'] = {}
-
-        add_resource(resource_ob, update=False)
-
-
-def insert_publish_resources(cursor, default_owner_id):
-    current_resources = Redis.hgetall("resources")
-    cursor.execute("""  SELECT 
-                            PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode
-                        FROM
-                            PublishXRef, PublishFreeze, InbredSet, Publication
-                        WHERE
-                            PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
-                            InbredSet.Id = PublishXRef.InbredSetId AND
-                            Publication.Id = PublishXRef.PublicationId""")
-
-    resource_results = cursor.fetchall()
-    for resource in resource_results:
-        if resource[2]:
-            resource_ob = {}
-            if resource[2]:
-                resource_ob['name'] = resource[2] + "_" + str(resource[0])
-            else:
-                resource_ob['name'] = str(resource[0])
-            resource_ob['owner_id'] = default_owner_id
-            resource_ob['data'] = {"dataset": str(resource[1]),
-                                   "trait": str(resource[0])}
-            resource_ob['type'] = "dataset-publish"
-            resource_ob['default_mask'] = {"data": "view",
-                                           "metadata": "view",
-                                           "admin": "not-admin"}
-
-            resource_ob['group_masks'] = {}
-
-            add_resource(resource_ob, update=False)
-        else:
-            continue
-
-
-def insert_geno_resources(cursor, default_owner_id):
-    current_resources = Redis.hgetall("resources")
-    cursor.execute("""  SELECT
-                            GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality
-                        FROM
-                            GenoFreeze""")
-
-    resource_results = cursor.fetchall()
-    for i, resource in enumerate(resource_results):
-        resource_ob = {}
-        resource_ob['name'] = resource[1]
-        if resource[1] == "HET3-ITPGeno":
-            resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
-        else:
-            resource_ob['owner_id'] = default_owner_id
-        resource_ob['data'] = {"dataset": str(resource[0])}
-        resource_ob['type'] = "dataset-geno"
-        if resource[2] < 1:
-            resource_ob['default_mask'] = {"data": "view",
-                                           "metadata": "view",
-                                           "admin": "not-admin"}
-        else:
-            resource_ob['default_mask'] = {"data": "no-access",
-                                           "metadata": "no-access",
-                                           "admin": "not-admin"}
-        resource_ob['group_masks'] = {}
-
-        add_resource(resource_ob, update=False)
-
-
-def insert_resources(default_owner_id):
-    current_resources = get_resources()
-    print("START")
-    insert_publish_resources(cursor, default_owner_id)
-    print("AFTER PUBLISH")
-    insert_geno_resources(cursor, default_owner_id)
-    print("AFTER GENO")
-    insert_probeset_resources(cursor, default_owner_id)
-    print("AFTER PROBESET")
-
-
-def main(cursor):
-    """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
-
-    Redis.delete("resources")
-
-    owner_id = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
-
-    insert_resources(owner_id)
-
-
-if __name__ == '__main__':
-    with database_connection(get_setting("SQL_URI")) as conn:
-        with conn.cursor() as cursor:
-            main(cursor)