diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/convert_dryad_to_bimbam.py | 70 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 2 | ||||
-rw-r--r-- | wqflask/maintenance/quantile_normalize.py | 4 |
3 files changed, 73 insertions, 3 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py new file mode 100644 index 00000000..e833b395 --- /dev/null +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -0,0 +1,70 @@ +#!/usr/bin/python + +""" +Convert data dryad files to a BIMBAM _geno and _snps file + + +""" + +from __future__ import print_function, division, absolute_import +import sys +sys.path.append("..") + + +def read_dryad_file(filename): + exclude_count = 0 + marker_list = [] + sample_dict = {} + sample_list = [] + geno_rows = [] + with open(filename, 'r') as the_file: + for i, line in enumerate(the_file): + if i > 0: + if line.split(" ")[1] == "no": + sample_name = line.split(" ")[0] + sample_list.append(sample_name) + sample_dict[sample_name] = line.split(" ")[2:] + else: + exclude_count += 1 + else: + marker_list = line.split(" ")[2:] + + for i, marker in enumerate(marker_list): + this_row = [] + this_row.append(marker) + this_row.append("X") + this_row.append("Y") + for sample in sample_list: + this_row.append(sample_dict[sample][i]) + geno_rows.append(this_row) + + print(exclude_count) + + return geno_rows + + #for i, marker in enumerate(marker_list): + # this_row = [] + # this_row.append(marker) + # this_row.append("X") + # this_row.append("Y") + # with open(filename, 'r') as the_file: + # for j, line in enumerate(the_file): + # if j > 0: + # this_row.append(line.split(" ")[i+2]) + # print("row: " + str(i)) + # geno_rows.append(this_row) + # + #return geno_rows + +def write_bimbam_files(geno_rows): + with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: + for row in geno_rows: + geno_fh.write(", ".join(row) + "\n") + +def convert_dryad_to_bimbam(filename): + geno_file_rows = read_dryad_file(filename) + write_bimbam_files(geno_file_rows) + +if __name__=="__main__": + input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" + convert_dryad_to_bimbam(input_filename)
\ No newline at end of file diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 2825c6ea..18b2dac9 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -63,7 +63,7 @@ from pprint import pformat as pf #conn = Engine.connect() -def parse_db_uri(db_uri): +def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urlparse.urlparse(SQL_URI) diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index c11073fb..41a3aad8 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -59,7 +59,7 @@ def set_data(dataset_name): orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" sample_list = [] - with open(orig_file, 'r') as orig_fh, open('quant_norm.csv', 'r') as quant_fh: + with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh: for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)): trait_dict = {} sample_list = [] @@ -118,7 +118,7 @@ if __name__ == '__main__': #out_filename = sys.argv[1][:-4] + '_quantnorm.txt' - #success, _ = bulk(es, set_data(sys.argv[1])) + success, _ = bulk(es, set_data(sys.argv[1])) response = es.search( index = "traits", doc_type = "trait", body = { |