diff options
Diffstat (limited to 'wqflask/maintenance/convert_geno_to_bimbam.py')
-rw-r--r-- | wqflask/maintenance/convert_geno_to_bimbam.py | 47 |
1 files changed, 29 insertions, 18 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 528b98cf..078be529 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -9,7 +9,6 @@ code """ -from __future__ import print_function, division, absolute_import import sys sys.path.append("..") import os @@ -21,9 +20,12 @@ import simplejson as json from pprint import pformat as pf -class EmptyConfigurations(Exception): pass -class Marker(object): +class EmptyConfigurations(Exception): + pass + + +class Marker: def __init__(self): self.name = None self.chr = None @@ -31,7 +33,8 @@ class Marker(object): self.Mb = None self.genotypes = [] -class ConvertGenoFile(object): + +class ConvertGenoFile: def __init__(self, input_file, output_files): self.input_file = input_file @@ -53,7 +56,7 @@ class ConvertGenoFile(object): '@pat': "0", '@het': "0.5", '@unk': "NA" - } + } self.configurations = {} self.input_fh = open(self.input_file) @@ -81,13 +84,14 @@ class ConvertGenoFile(object): genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) else: this_marker.genotypes.append("NA") self.markers.append(this_marker.__dict__) - self.write_to_bimbam() + self.write_to_bimbam() def write_to_bimbam(self): with open(self.output_files[0], "w") as geno_fh: @@ -104,9 +108,11 @@ class ConvertGenoFile(object): with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: - snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") else: - snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") def get_sample_list(self, row_contents): self.sample_list = [] @@ -120,7 +126,7 @@ class ConvertGenoFile(object): self.sample_list = row_contents[3:] else: self.sample_list = row_contents[2:] - + def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): self.latest_row_value = row @@ -158,10 +164,14 @@ class ConvertGenoFile(object): group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_output_file = os.path.join(new_directory, group_name + "_geno.txt") - pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt") - snp_output_file = os.path.join(new_directory, group_name + "_snps.txt") - output_files = [geno_output_file, pheno_output_file, snp_output_file] + geno_output_file = os.path.join( + new_directory, group_name + "_geno.txt") + pheno_output_file = os.path.join( + new_directory, group_name + "_pheno.txt") + snp_output_file = os.path.join( + new_directory, group_name + "_snps.txt") + output_files = [geno_output_file, + pheno_output_file, snp_output_file] print("%s -> %s" % ( os.path.join(old_directory, input_file), geno_output_file)) convertob = ConvertGenoFile(input_file, output_files) @@ -174,17 +184,18 @@ class ConvertGenoFile(object): print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break -if __name__=="__main__": + +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory)
\ No newline at end of file + # ConvertGenoFiles(Geno_Directory) |