diff options
author | zsloan | 2018-05-25 15:52:40 +0000 |
---|---|---|
committer | zsloan | 2018-05-25 15:52:40 +0000 |
commit | 9bb60bb18ae5ac70fe480095554796b7c18f1b6c (patch) | |
tree | 58e1882cdb742e7d4a8e793a2545902fec803ece /wqflask/maintenance/convert_dryad_to_bimbam.py | |
parent | 67e8f12e103f48329d8b3e38125c0e84b9dc089d (diff) | |
download | genenetwork2-9bb60bb18ae5ac70fe480095554796b7c18f1b6c.tar.gz |
Fixed issue causing anonymous collections to not work on my branch and staging, though still not sure why it's working on production without that change
Added script to convert the dryad format genotype files to BIMBAM
removed db_uri from parameters of parse_db_uri in gen_select_dataset.py, since it can now just pull it from settings as a global variable
Diffstat (limited to 'wqflask/maintenance/convert_dryad_to_bimbam.py')
-rw-r--r-- | wqflask/maintenance/convert_dryad_to_bimbam.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py new file mode 100644 index 00000000..e833b395 --- /dev/null +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -0,0 +1,70 @@ +#!/usr/bin/python + +""" +Convert data dryad files to a BIMBAM _geno and _snps file + + +""" + +from __future__ import print_function, division, absolute_import +import sys +sys.path.append("..") + + +def read_dryad_file(filename): + exclude_count = 0 + marker_list = [] + sample_dict = {} + sample_list = [] + geno_rows = [] + with open(filename, 'r') as the_file: + for i, line in enumerate(the_file): + if i > 0: + if line.split(" ")[1] == "no": + sample_name = line.split(" ")[0] + sample_list.append(sample_name) + sample_dict[sample_name] = line.split(" ")[2:] + else: + exclude_count += 1 + else: + marker_list = line.split(" ")[2:] + + for i, marker in enumerate(marker_list): + this_row = [] + this_row.append(marker) + this_row.append("X") + this_row.append("Y") + for sample in sample_list: + this_row.append(sample_dict[sample][i]) + geno_rows.append(this_row) + + print(exclude_count) + + return geno_rows + + #for i, marker in enumerate(marker_list): + # this_row = [] + # this_row.append(marker) + # this_row.append("X") + # this_row.append("Y") + # with open(filename, 'r') as the_file: + # for j, line in enumerate(the_file): + # if j > 0: + # this_row.append(line.split(" ")[i+2]) + # print("row: " + str(i)) + # geno_rows.append(this_row) + # + #return geno_rows + +def write_bimbam_files(geno_rows): + with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: + for row in geno_rows: + geno_fh.write(", ".join(row) + "\n") + +def convert_dryad_to_bimbam(filename): + geno_file_rows = read_dryad_file(filename) + write_bimbam_files(geno_file_rows) + +if __name__=="__main__": + input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" + convert_dryad_to_bimbam(input_filename)
\ No newline at end of file |