about summary refs log tree commit diff
path: root/wqflask/maintenance/convert_geno_to_bimbam.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance/convert_geno_to_bimbam.py')
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py47
1 files changed, 29 insertions, 18 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 528b98cf..078be529 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -9,7 +9,6 @@ code
 
 """
 
-from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
@@ -21,9 +20,12 @@ import simplejson as json
 
 from pprint import pformat as pf
 
-class EmptyConfigurations(Exception): pass
 
-class Marker(object):
+class EmptyConfigurations(Exception):
+    pass
+
+
+class Marker:
     def __init__(self):
         self.name = None
         self.chr = None
@@ -31,7 +33,8 @@ class Marker(object):
         self.Mb = None
         self.genotypes = []
 
-class ConvertGenoFile(object):
+
+class ConvertGenoFile:
 
     def __init__(self, input_file, output_files):
         self.input_file = input_file
@@ -53,7 +56,7 @@ class ConvertGenoFile(object):
             '@pat': "0",
             '@het': "0.5",
             '@unk': "NA"
-            }
+        }
 
         self.configurations = {}
         self.input_fh = open(self.input_file)
@@ -81,13 +84,14 @@ class ConvertGenoFile(object):
                 genotypes = row_items[2:]
             for item_count, genotype in enumerate(genotypes):
                 if genotype.upper().strip() in self.configurations:
-                    this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+                    this_marker.genotypes.append(
+                        self.configurations[genotype.upper().strip()])
                 else:
                     this_marker.genotypes.append("NA")
 
             self.markers.append(this_marker.__dict__)
 
-        self.write_to_bimbam()    
+        self.write_to_bimbam()
 
     def write_to_bimbam(self):
         with open(self.output_files[0], "w") as geno_fh:
@@ -104,9 +108,11 @@ class ConvertGenoFile(object):
         with open(self.output_files[2], "w") as snp_fh:
             for marker in self.markers:
                 if self.mb_exists:
-                    snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n")
+                    snp_fh.write(
+                        marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
                 else:
-                    snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n")
+                    snp_fh.write(
+                        marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
 
     def get_sample_list(self, row_contents):
         self.sample_list = []
@@ -120,7 +126,7 @@ class ConvertGenoFile(object):
                 self.sample_list = row_contents[3:]
             else:
                 self.sample_list = row_contents[2:]
-    
+
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
             self.latest_row_value = row
@@ -158,10 +164,14 @@ class ConvertGenoFile(object):
             group_name = ".".join(input_file.split('.')[:-1])
             if group_name == "HSNIH-Palmer":
                 continue
-            geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
-            pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
-            snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
-            output_files = [geno_output_file, pheno_output_file, snp_output_file]
+            geno_output_file = os.path.join(
+                new_directory, group_name + "_geno.txt")
+            pheno_output_file = os.path.join(
+                new_directory, group_name + "_pheno.txt")
+            snp_output_file = os.path.join(
+                new_directory, group_name + "_snps.txt")
+            output_files = [geno_output_file,
+                            pheno_output_file, snp_output_file]
             print("%s -> %s" % (
                 os.path.join(old_directory, input_file), geno_output_file))
             convertob = ConvertGenoFile(input_file, output_files)
@@ -174,17 +184,18 @@ class ConvertGenoFile(object):
                 print("  Exception:", why)
                 print(traceback.print_exc())
                 print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                convertob.latest_col_pos))
+                                                                    convertob.latest_col_pos))
                 print("    Column is:", convertob.latest_col_value)
                 print("    Row is:", convertob.latest_row_value)
                 break
 
-if __name__=="__main__":
+
+if __name__ == "__main__":
     Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
     New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
     #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
     #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
     #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
-    #convertob.convert()
+    # convertob.convert()
     ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
-    #ConvertGenoFiles(Geno_Directory)
\ No newline at end of file
+    # ConvertGenoFiles(Geno_Directory)