diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/convert_dryad_to_bimbam.py | 3 | ||||
-rw-r--r-- | wqflask/maintenance/convert_geno_to_bimbam.py | 4 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 5 | ||||
-rw-r--r-- | wqflask/maintenance/generate_kinship_from_bimbam.py | 1 | ||||
-rw-r--r-- | wqflask/maintenance/generate_probesetfreeze_file.py | 7 | ||||
-rw-r--r-- | wqflask/maintenance/geno_to_json.py | 6 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 3 | ||||
-rw-r--r-- | wqflask/maintenance/print_benchmark.py | 4 | ||||
-rw-r--r-- | wqflask/maintenance/quantile_normalize.py | 6 | ||||
-rw-r--r-- | wqflask/maintenance/set_resource_defaults.py | 7 |
10 files changed, 42 insertions, 4 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py index 8eab66e8..e417c280 100644 --- a/wqflask/maintenance/convert_dryad_to_bimbam.py +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -55,15 +55,18 @@ def read_dryad_file(filename): # # return geno_rows + def write_bimbam_files(geno_rows): with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: for row in geno_rows: geno_fh.write(", ".join(row) + "\n") + def convert_dryad_to_bimbam(filename): geno_file_rows = read_dryad_file(filename) write_bimbam_files(geno_file_rows) + if __name__ == "__main__": input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" convert_dryad_to_bimbam(input_filename) diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index dc01cbb3..5b2369c9 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -20,8 +20,10 @@ import simplejson as json from pprint import pformat as pf + class EmptyConfigurations(Exception): pass + class Marker: def __init__(self): self.name = None @@ -30,6 +32,7 @@ class Marker: self.Mb = None self.genotypes = [] + class ConvertGenoFile: def __init__(self, input_file, output_files): @@ -178,6 +181,7 @@ class ConvertGenoFile: print(" Row is:", convertob.latest_row_value) break + if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index f480d63f..583a06e1 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -55,6 +55,7 @@ from pprint import pformat as pf #conn = Engine.connect() + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" @@ -143,6 +144,7 @@ def phenotypes_exist(group_name): else: return False + def genotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from GenoFreeze @@ -156,6 +158,7 @@ def genotypes_exist(group_name): else: return False + def build_types(species, group): """Fetches tissues @@ -184,6 +187,7 @@ def build_types(species, group): return results + def get_datasets(types): """Build datasets list""" datasets = {} @@ -308,6 +312,7 @@ def _test_it(): datasets = build_datasets("Mouse", "BXD", "Hippocampus") #print("build_datasets:", pf(datasets)) + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py index 664e9e48..7cc60c9e 100644 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -13,6 +13,7 @@ sys.path.append("..") import os import glob + class GenerateKinshipMatrices: def __init__(self, group_name, geno_file, pheno_file): self.group_name = group_name diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index b1e41e9a..bd9c2ab4 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -23,10 +23,12 @@ def get_cursor(): cursor = con.cursor() return cursor + def show_progress(process, counter): if counter % 1000 == 0: print("{}: {}".format(process, counter)) + def get_strains(cursor): cursor.execute("""select Strain.Name from Strain, StrainXRef, InbredSet @@ -42,6 +44,7 @@ def get_strains(cursor): return strains + def get_probeset_vals(cursor, dataset_name): cursor.execute(""" select ProbeSet.Id, ProbeSet.Name from ProbeSetXRef, @@ -77,6 +80,7 @@ def get_probeset_vals(cursor, dataset_name): return probeset_vals + def trim_strains(strains, probeset_vals): trimmed_strains = [] #print("probeset_vals is:", pf(probeset_vals)) @@ -89,6 +93,7 @@ def trim_strains(strains, probeset_vals): print("trimmed_strains:", pf(trimmed_strains)) return trimmed_strains + def write_data_matrix_file(strains, probeset_vals, filename): with open(filename, "wb") as fh: csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) @@ -103,6 +108,7 @@ def write_data_matrix_file(strains, probeset_vals, filename): csv_writer.writerow(row_data) show_progress("Writing", counter) + def main(): filename = os.path.expanduser("~/gene/wqflask/maintenance/" + "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + @@ -117,5 +123,6 @@ def main(): trimmed_strains = trim_strains(strains, probeset_vals) write_data_matrix_file(trimmed_strains, probeset_vals, filename) + if __name__ == '__main__': main() diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py index fa0dcebd..ad3f2b72 100644 --- a/wqflask/maintenance/geno_to_json.py +++ b/wqflask/maintenance/geno_to_json.py @@ -25,10 +25,10 @@ from pprint import pformat as pf #from utility.tools import flat_files + class EmptyConfigurations(Exception): pass - class Marker: def __init__(self): self.name = None @@ -37,6 +37,7 @@ class Marker: self.Mb = None self.genotypes = [] + class ConvertGenoFile: def __init__(self, input_file, output_file): @@ -78,7 +79,6 @@ class ConvertGenoFile: # elif self.file_type == "snps": # self.process_snps_file() - def process_csv(self): for row_count, row in enumerate(self.process_rows()): row_items = row.split("\t") @@ -121,7 +121,6 @@ class ConvertGenoFile: # self.output_fh.write("\n") - def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): # if self.input_file.endswith(".geno.gz"): @@ -182,7 +181,6 @@ class ConvertGenoFile: # convertob = ConvertGenoFile(input_file, output_file) - if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 3f9d0278..0a450d3f 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -4,12 +4,14 @@ import gzip from base import webqtlConfig + def get_samplelist(file_type, geno_file): if file_type == "geno": return get_samplelist_from_geno(geno_file) elif file_type == "plink": return get_samplelist_from_plink(geno_file) + def get_samplelist_from_geno(genofilename): if os.path.isfile(genofilename + '.gz'): genofilename += '.gz' @@ -33,6 +35,7 @@ def get_samplelist_from_geno(genofilename): samplelist = headers[3:] return samplelist + def get_samplelist_from_plink(genofilename): genofile = open(genofilename) diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py index a1046c86..9d12da8a 100644 --- a/wqflask/maintenance/print_benchmark.py +++ b/wqflask/maintenance/print_benchmark.py @@ -15,15 +15,18 @@ class TheCounter: self.time_took = time.time() - start_time TheCounter.Counters[self.__class__.__name__] = self.time_took + class PrintAll(TheCounter): def print_it(self, counter): print(counter) + class PrintSome(TheCounter): def print_it(self, counter): if counter % 1000 == 0: print(counter) + class PrintNone(TheCounter): def print_it(self, counter): pass @@ -37,5 +40,6 @@ def new_main(): print(pf(TheCounter.Counters)) + if __name__ == '__main__': new_main() diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 6751a8e5..1896bc52 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -14,6 +14,7 @@ from wqflask import app from utility.elasticsearch_tools import get_elasticsearch_connection from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" @@ -28,6 +29,7 @@ def parse_db_uri(): print(db_conn_info) return db_conn_info + def create_dataframe(input_file): with open(input_file) as f: ncols = len(f.readline().split("\t")) @@ -36,6 +38,8 @@ def create_dataframe(input_file): return pd.DataFrame(input_array) # This function taken from https://github.com/ShawnLYU/Quantile_Normalize + + def quantileNormalize(df_input): df = df_input.copy() # compute rank @@ -50,6 +54,7 @@ def quantileNormalize(df_input): df[col] = [rank[i] for i in t] return df + def set_data(dataset_name): orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" @@ -95,6 +100,7 @@ def set_data(dataset_name): } } + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py index 286094dd..c6c4f44c 100644 --- a/wqflask/maintenance/set_resource_defaults.py +++ b/wqflask/maintenance/set_resource_defaults.py @@ -37,6 +37,7 @@ import urllib.parse from utility.logger import getLogger logger = getLogger(__name__) + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" @@ -51,6 +52,7 @@ def parse_db_uri(): print(db_conn_info) return db_conn_info + def insert_probeset_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -77,6 +79,7 @@ def insert_probeset_resources(default_owner_id): add_resource(resource_ob, update=False) + def insert_publish_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -110,6 +113,7 @@ def insert_publish_resources(default_owner_id): else: continue + def insert_geno_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -139,6 +143,7 @@ def insert_geno_resources(default_owner_id): add_resource(resource_ob, update=False) + def insert_resources(default_owner_id): current_resources = get_resources() print("START") @@ -149,6 +154,7 @@ def insert_resources(default_owner_id): insert_probeset_resources(default_owner_id) print("AFTER PROBESET") + def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" @@ -158,6 +164,7 @@ def main(): insert_resources(owner_id) + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() |