about summary refs log tree commit diff
path: root/wqflask/maintenance/geno_to_json.py
diff options
context:
space:
mode:
authorArthur Centeno2021-10-25 21:04:23 +0000
committerArthur Centeno2021-10-25 21:04:23 +0000
commit499a80f138030c4de1629c043c8f9401a99894ea (patch)
tree449dcae965d13f966fb6d52625fbc86661c8c6a0 /wqflask/maintenance/geno_to_json.py
parent6151faa9ea67af4bf4ea95fb681a9dc4319474b6 (diff)
parent700802303e5e8221a9d591ba985d6607aa61e1ce (diff)
downloadgenenetwork2-499a80f138030c4de1629c043c8f9401a99894ea.tar.gz
Merge github.com:genenetwork/genenetwork2 into acenteno
Diffstat (limited to 'wqflask/maintenance/geno_to_json.py')
-rw-r--r--wqflask/maintenance/geno_to_json.py85
1 files changed, 42 insertions, 43 deletions
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index 9579812a..32e0e34b 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -9,7 +9,6 @@ code
 
 """
 
-from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
@@ -26,11 +25,12 @@ from pprint import pformat as pf
 
 #from utility.tools import flat_files
 
-class EmptyConfigurations(Exception): pass
 
-        
+class EmptyConfigurations(Exception):
+    pass
 
-class Marker(object):
+
+class Marker:
     def __init__(self):
         self.name = None
         self.chr = None
@@ -38,23 +38,24 @@ class Marker(object):
         self.Mb = None
         self.genotypes = []
 
-class ConvertGenoFile(object):
+
+class ConvertGenoFile:
 
     def __init__(self, input_file, output_file):
-        
+
         self.input_file = input_file
         self.output_file = output_file
-        
+
         self.mb_exists = False
         self.cm_exists = False
         self.markers = []
-        
+
         self.latest_row_pos = None
         self.latest_col_pos = None
-        
+
         self.latest_row_value = None
         self.latest_col_value = None
-        
+
     def convert(self):
 
         self.haplotype_notation = {
@@ -62,24 +63,23 @@ class ConvertGenoFile(object):
             '@pat': "0",
             '@het': "0.5",
             '@unk': "NA"
-            }
-        
+        }
+
         self.configurations = {}
         #self.skipped_cols = 3
-        
-        #if self.input_file.endswith(".geno.gz"):
+
+        # if self.input_file.endswith(".geno.gz"):
         #    print("self.input_file: ", self.input_file)
         #    self.input_fh = gzip.open(self.input_file)
-        #else:
+        # else:
         self.input_fh = open(self.input_file)
-        
+
         with open(self.output_file, "w") as self.output_fh:
-            #if self.file_type == "geno":
+            # if self.file_type == "geno":
             self.process_csv()
-            #elif self.file_type == "snps":
+            # elif self.file_type == "snps":
             #    self.process_snps_file()
 
-
     def process_csv(self):
         for row_count, row in enumerate(self.process_rows()):
             row_items = row.split("\t")
@@ -101,31 +101,31 @@ class ConvertGenoFile(object):
                 genotypes = row_items[2:]
             for item_count, genotype in enumerate(genotypes):
                 if genotype.upper() in self.configurations:
-                    this_marker.genotypes.append(self.configurations[genotype.upper()])
+                    this_marker.genotypes.append(
+                        self.configurations[genotype.upper()])
                 else:
                     this_marker.genotypes.append("NA")
-                
-            #print("this_marker is:", pf(this_marker.__dict__))   
-            #if this_marker.chr == "14":
+
+            #print("this_marker is:", pf(this_marker.__dict__))
+            # if this_marker.chr == "14":
             self.markers.append(this_marker.__dict__)
 
         with open(self.output_file, 'w') as fh:
             json.dump(self.markers, fh, indent="   ", sort_keys=True)
-                
-                # print('configurations:', str(configurations))
-                #self.latest_col_pos = item_count + self.skipped_cols
-                #self.latest_col_value = item
-                
-                #if item_count != 0:
-                #    self.output_fh.write(" ")
-                #self.output_fh.write(self.configurations[item.upper()])
-                    
-            #self.output_fh.write("\n")
 
+            # print('configurations:', str(configurations))
+            #self.latest_col_pos = item_count + self.skipped_cols
+            #self.latest_col_value = item
+
+            # if item_count != 0:
+            #    self.output_fh.write(" ")
+            # self.output_fh.write(self.configurations[item.upper()])
+
+            # self.output_fh.write("\n")
 
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
-            #if self.input_file.endswith(".geno.gz"):
+            # if self.input_file.endswith(".geno.gz"):
             #    print("row: ", row)
             self.latest_row_value = row
             # Take care of headers
@@ -172,26 +172,25 @@ class ConvertGenoFile(object):
                 print("  Exception:", why)
                 print(traceback.print_exc())
                 print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                convertob.latest_col_pos))
+                                                                    convertob.latest_col_pos))
                 print("    Column is:", convertob.latest_col_value)
                 print("    Row is:", convertob.latest_row_value)
                 break
-            
-    #def process_snps_file(cls, snps_file, new_directory):
+
+    # def process_snps_file(cls, snps_file, new_directory):
     #    output_file = os.path.join(new_directory, "mouse_families.json")
     #    print("%s -> %s" % (snps_file, output_file))
     #    convertob = ConvertGenoFile(input_file, output_file)
-        
 
 
-if __name__=="__main__":
+if __name__ == "__main__":
     Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
     New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
     #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
     #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
     #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
-    #convertob.convert()
+    # convertob.convert()
     ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
-    #ConvertGenoFiles(Geno_Directory)
-    
-    #process_csv(Input_File, Output_File)
\ No newline at end of file
+    # ConvertGenoFiles(Geno_Directory)
+
+    #process_csv(Input_File, Output_File)