1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
from __future__ import absolute_import, division, print_function
class genotype(object):
"""
Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure
"""
def __init__(self, filename):
self.group = None
self.type = "riset"
self.prgy = []
self.nprgy = 0
self.mat = -1
self.pat = 1
self.het = 0
self.unk = "U"
self.filler = False
self.mb_exists = False
#ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary
self.cm_column = 2
self.mb_column = 3
self.chromosomes = []
self.read_file(filename)
def __iter__(self):
return iter(self.chromosomes)
def __getitem__(self, index):
return self.chromosomes[index]
def __len__(self):
return len(self.chromosomes)
def read_file(self, filename):
with open(filename, 'r') as geno_file:
lines = geno_file.readlines()
this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers
chr_ob = None
for line in lines:
if line[0] == "#":
continue
elif line[0] == "@":
label = line.split(":")[0][1:]
if label == "name":
self.group = line.split(":")[1]
elif label == "filler":
if line.split(":")[1] == "yes":
self.filler = True
elif label == "type":
self.type = line.split(":")[1]
elif label == "mat":
self.mat = line.split(":")[1]
elif label == "pat":
self.pat = line.split(":")[1]
elif label == "het":
self.het = line.split(":")[1]
elif label == "unk":
self.unk = line.split(":")[1]
else:
continue
elif line[:3] == "Chr":
header_row = line.split("\t")
if header_row[2] == "Mb":
self.mb_exists = True
self.mb_column = 2
self.cm_column = 3
elif header_row[3] == "Mb":
self.mb_exists = True
self.mb_column = 3
elif header_row[2] == "cM":
self.cm_column = 2
if self.mb_exists:
self.prgy = header_row[4:]
else:
self.prgy = header_row[3:]
self.nprgy = len(self.prgy)
else:
if line.split("\t")[0] != this_chr:
if this_chr != "":
self.chromosomes.append(chr_ob)
this_chr = line.split("\t")[0]
chr_ob = Chr(line.split("\t")[0], self)
chr_ob.add_marker(line.split("\t"))
self.chromosomes.append(chr_ob)
class Chr(object):
def __init__(self, name, geno_ob):
self.name = name
self.loci = []
self.mb_exists = geno_ob.mb_exists
self.cm_column = geno_ob.cm_column
self.mb_column = geno_ob.mb_column
self.geno_ob = geno_ob
def __iter__(self):
return iter(self.loci)
def __getitem__(self, index):
return self.loci[index]
def __len__(self):
return len(self.loci)
def add_marker(self, marker_row):
self.loci.append(Locus(marker_row, self.geno_ob))
class Locus(object):
def __init__(self, marker_row, geno_ob):
self.chr = marker_row[0]
self.name = marker_row[1]
self.cM = float(marker_row[geno_ob.cm_column])
self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None
geno_table = {
geno_ob.mat: -1,
geno_ob.pat: 1,
geno_ob.het: 0,
geno_ob.unk: "U"
}
self.genotype = []
if geno_ob.mb_exists:
start_pos = 4
else:
start_pos = 3
for allele in marker_row[start_pos:]:
if allele in geno_table.keys():
self.genotype.append(geno_table[allele])
else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown
self.genotype.append("U")
|