diff options
author | zsloan | 2018-08-14 20:16:32 +0000 |
---|---|---|
committer | zsloan | 2018-08-14 20:16:32 +0000 |
commit | 838362c116b02c090dadeb76cda27e9902a6626a (patch) | |
tree | a6be104cc73e3bc9e271f9b5ca854dd32f3b810d /wqflask/maintenance/convert_dryad_to_bimbam.py | |
parent | 0bead53661ea701ffd9f9d565e4d2ecbbed81a8e (diff) | |
parent | 85defabb17ecdef1c7b8e92fa2e06b44d1e9ca49 (diff) | |
download | genenetwork2-838362c116b02c090dadeb76cda27e9902a6626a.tar.gz |
Merge branch 'testing' of https://github.com/genenetwork/genenetwork2 into production
Diffstat (limited to 'wqflask/maintenance/convert_dryad_to_bimbam.py')
-rw-r--r-- | wqflask/maintenance/convert_dryad_to_bimbam.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py new file mode 100644 index 00000000..e833b395 --- /dev/null +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -0,0 +1,70 @@ +#!/usr/bin/python + +""" +Convert data dryad files to a BIMBAM _geno and _snps file + + +""" + +from __future__ import print_function, division, absolute_import +import sys +sys.path.append("..") + + +def read_dryad_file(filename): + exclude_count = 0 + marker_list = [] + sample_dict = {} + sample_list = [] + geno_rows = [] + with open(filename, 'r') as the_file: + for i, line in enumerate(the_file): + if i > 0: + if line.split(" ")[1] == "no": + sample_name = line.split(" ")[0] + sample_list.append(sample_name) + sample_dict[sample_name] = line.split(" ")[2:] + else: + exclude_count += 1 + else: + marker_list = line.split(" ")[2:] + + for i, marker in enumerate(marker_list): + this_row = [] + this_row.append(marker) + this_row.append("X") + this_row.append("Y") + for sample in sample_list: + this_row.append(sample_dict[sample][i]) + geno_rows.append(this_row) + + print(exclude_count) + + return geno_rows + + #for i, marker in enumerate(marker_list): + # this_row = [] + # this_row.append(marker) + # this_row.append("X") + # this_row.append("Y") + # with open(filename, 'r') as the_file: + # for j, line in enumerate(the_file): + # if j > 0: + # this_row.append(line.split(" ")[i+2]) + # print("row: " + str(i)) + # geno_rows.append(this_row) + # + #return geno_rows + +def write_bimbam_files(geno_rows): + with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: + for row in geno_rows: + geno_fh.write(", ".join(row) + "\n") + +def convert_dryad_to_bimbam(filename): + geno_file_rows = read_dryad_file(filename) + write_bimbam_files(geno_file_rows) + +if __name__=="__main__": + input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" + convert_dryad_to_bimbam(input_filename)
\ No newline at end of file |