aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance/convert_geno_to_bimbam.py
diff options
context:
space:
mode:
authorArthur Centeno2021-10-25 21:04:23 +0000
committerArthur Centeno2021-10-25 21:04:23 +0000
commit499a80f138030c4de1629c043c8f9401a99894ea (patch)
tree449dcae965d13f966fb6d52625fbc86661c8c6a0 /wqflask/maintenance/convert_geno_to_bimbam.py
parent6151faa9ea67af4bf4ea95fb681a9dc4319474b6 (diff)
parent700802303e5e8221a9d591ba985d6607aa61e1ce (diff)
downloadgenenetwork2-499a80f138030c4de1629c043c8f9401a99894ea.tar.gz
Merge github.com:genenetwork/genenetwork2 into acenteno
Diffstat (limited to 'wqflask/maintenance/convert_geno_to_bimbam.py')
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py47
1 files changed, 29 insertions, 18 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 528b98cf..078be529 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -9,7 +9,6 @@ code
"""
-from __future__ import print_function, division, absolute_import
import sys
sys.path.append("..")
import os
@@ -21,9 +20,12 @@ import simplejson as json
from pprint import pformat as pf
-class EmptyConfigurations(Exception): pass
-class Marker(object):
+class EmptyConfigurations(Exception):
+ pass
+
+
+class Marker:
def __init__(self):
self.name = None
self.chr = None
@@ -31,7 +33,8 @@ class Marker(object):
self.Mb = None
self.genotypes = []
-class ConvertGenoFile(object):
+
+class ConvertGenoFile:
def __init__(self, input_file, output_files):
self.input_file = input_file
@@ -53,7 +56,7 @@ class ConvertGenoFile(object):
'@pat': "0",
'@het': "0.5",
'@unk': "NA"
- }
+ }
self.configurations = {}
self.input_fh = open(self.input_file)
@@ -81,13 +84,14 @@ class ConvertGenoFile(object):
genotypes = row_items[2:]
for item_count, genotype in enumerate(genotypes):
if genotype.upper().strip() in self.configurations:
- this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+ this_marker.genotypes.append(
+ self.configurations[genotype.upper().strip()])
else:
this_marker.genotypes.append("NA")
self.markers.append(this_marker.__dict__)
- self.write_to_bimbam()
+ self.write_to_bimbam()
def write_to_bimbam(self):
with open(self.output_files[0], "w") as geno_fh:
@@ -104,9 +108,11 @@ class ConvertGenoFile(object):
with open(self.output_files[2], "w") as snp_fh:
for marker in self.markers:
if self.mb_exists:
- snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n")
+ snp_fh.write(
+ marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
else:
- snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n")
+ snp_fh.write(
+ marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
def get_sample_list(self, row_contents):
self.sample_list = []
@@ -120,7 +126,7 @@ class ConvertGenoFile(object):
self.sample_list = row_contents[3:]
else:
self.sample_list = row_contents[2:]
-
+
def process_rows(self):
for self.latest_row_pos, row in enumerate(self.input_fh):
self.latest_row_value = row
@@ -158,10 +164,14 @@ class ConvertGenoFile(object):
group_name = ".".join(input_file.split('.')[:-1])
if group_name == "HSNIH-Palmer":
continue
- geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
- pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
- snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
- output_files = [geno_output_file, pheno_output_file, snp_output_file]
+ geno_output_file = os.path.join(
+ new_directory, group_name + "_geno.txt")
+ pheno_output_file = os.path.join(
+ new_directory, group_name + "_pheno.txt")
+ snp_output_file = os.path.join(
+ new_directory, group_name + "_snps.txt")
+ output_files = [geno_output_file,
+ pheno_output_file, snp_output_file]
print("%s -> %s" % (
os.path.join(old_directory, input_file), geno_output_file))
convertob = ConvertGenoFile(input_file, output_files)
@@ -174,17 +184,18 @@ class ConvertGenoFile(object):
print(" Exception:", why)
print(traceback.print_exc())
print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
- convertob.latest_col_pos))
+ convertob.latest_col_pos))
print(" Column is:", convertob.latest_col_value)
print(" Row is:", convertob.latest_row_value)
break
-if __name__=="__main__":
+
+if __name__ == "__main__":
Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
#Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
#Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
#convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
- #convertob.convert()
+ # convertob.convert()
ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
- #ConvertGenoFiles(Geno_Directory) \ No newline at end of file
+ # ConvertGenoFiles(Geno_Directory)