aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance/geno_to_json.py
diff options
context:
space:
mode:
authorzsloan2021-10-18 17:50:26 +0000
committerzsloan2021-10-18 17:50:26 +0000
commite36eaf0003a598bc5aa688803dd1b36c24a4c051 (patch)
treea59b7dadf02241575eb0774f97c6048e2425c053 /wqflask/maintenance/geno_to_json.py
parentbd421438f1f0b4de913fa40cd49cfcda27e6b16f (diff)
parent04f3d13aceeaec2e52b94037d59f08ed6dc6a8bb (diff)
downloadgenenetwork2-e36eaf0003a598bc5aa688803dd1b36c24a4c051.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into feature/remove_trait_creation_from_search
Diffstat (limited to 'wqflask/maintenance/geno_to_json.py')
-rw-r--r--wqflask/maintenance/geno_to_json.py82
1 files changed, 41 insertions, 41 deletions
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index 7e7fd241..32e0e34b 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -25,11 +25,12 @@ from pprint import pformat as pf
#from utility.tools import flat_files
-class EmptyConfigurations(Exception): pass
-
+class EmptyConfigurations(Exception):
+ pass
-class Marker(object):
+
+class Marker:
def __init__(self):
self.name = None
self.chr = None
@@ -37,23 +38,24 @@ class Marker(object):
self.Mb = None
self.genotypes = []
-class ConvertGenoFile(object):
+
+class ConvertGenoFile:
def __init__(self, input_file, output_file):
-
+
self.input_file = input_file
self.output_file = output_file
-
+
self.mb_exists = False
self.cm_exists = False
self.markers = []
-
+
self.latest_row_pos = None
self.latest_col_pos = None
-
+
self.latest_row_value = None
self.latest_col_value = None
-
+
def convert(self):
self.haplotype_notation = {
@@ -61,24 +63,23 @@ class ConvertGenoFile(object):
'@pat': "0",
'@het': "0.5",
'@unk': "NA"
- }
-
+ }
+
self.configurations = {}
#self.skipped_cols = 3
-
- #if self.input_file.endswith(".geno.gz"):
+
+ # if self.input_file.endswith(".geno.gz"):
# print("self.input_file: ", self.input_file)
# self.input_fh = gzip.open(self.input_file)
- #else:
+ # else:
self.input_fh = open(self.input_file)
-
+
with open(self.output_file, "w") as self.output_fh:
- #if self.file_type == "geno":
+ # if self.file_type == "geno":
self.process_csv()
- #elif self.file_type == "snps":
+ # elif self.file_type == "snps":
# self.process_snps_file()
-
def process_csv(self):
for row_count, row in enumerate(self.process_rows()):
row_items = row.split("\t")
@@ -100,31 +101,31 @@ class ConvertGenoFile(object):
genotypes = row_items[2:]
for item_count, genotype in enumerate(genotypes):
if genotype.upper() in self.configurations:
- this_marker.genotypes.append(self.configurations[genotype.upper()])
+ this_marker.genotypes.append(
+ self.configurations[genotype.upper()])
else:
this_marker.genotypes.append("NA")
-
- #print("this_marker is:", pf(this_marker.__dict__))
- #if this_marker.chr == "14":
+
+ #print("this_marker is:", pf(this_marker.__dict__))
+ # if this_marker.chr == "14":
self.markers.append(this_marker.__dict__)
with open(self.output_file, 'w') as fh:
json.dump(self.markers, fh, indent=" ", sort_keys=True)
-
- # print('configurations:', str(configurations))
- #self.latest_col_pos = item_count + self.skipped_cols
- #self.latest_col_value = item
-
- #if item_count != 0:
- # self.output_fh.write(" ")
- #self.output_fh.write(self.configurations[item.upper()])
-
- #self.output_fh.write("\n")
+ # print('configurations:', str(configurations))
+ #self.latest_col_pos = item_count + self.skipped_cols
+ #self.latest_col_value = item
+
+ # if item_count != 0:
+ # self.output_fh.write(" ")
+ # self.output_fh.write(self.configurations[item.upper()])
+
+ # self.output_fh.write("\n")
def process_rows(self):
for self.latest_row_pos, row in enumerate(self.input_fh):
- #if self.input_file.endswith(".geno.gz"):
+ # if self.input_file.endswith(".geno.gz"):
# print("row: ", row)
self.latest_row_value = row
# Take care of headers
@@ -171,26 +172,25 @@ class ConvertGenoFile(object):
print(" Exception:", why)
print(traceback.print_exc())
print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
- convertob.latest_col_pos))
+ convertob.latest_col_pos))
print(" Column is:", convertob.latest_col_value)
print(" Row is:", convertob.latest_row_value)
break
-
- #def process_snps_file(cls, snps_file, new_directory):
+
+ # def process_snps_file(cls, snps_file, new_directory):
# output_file = os.path.join(new_directory, "mouse_families.json")
# print("%s -> %s" % (snps_file, output_file))
# convertob = ConvertGenoFile(input_file, output_file)
-
-if __name__=="__main__":
+if __name__ == "__main__":
Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
#Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
#Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
#convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
- #convertob.convert()
+ # convertob.convert()
ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
- #ConvertGenoFiles(Geno_Directory)
-
+ # ConvertGenoFiles(Geno_Directory)
+
#process_csv(Input_File, Output_File)