diff options
Diffstat (limited to 'wqflask/utility/gen_geno_ob.py')
-rw-r--r-- | wqflask/utility/gen_geno_ob.py | 270 |
1 files changed, 136 insertions, 134 deletions
diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 5824b0b3..5172369f 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,135 +1,137 @@ -from __future__ import absolute_import, division, print_function
-
-class genotype(object):
- """
- Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure
- """
-
- def __init__(self, filename):
- self.group = None
- self.type = "riset"
- self.prgy = []
- self.nprgy = 0
- self.mat = -1
- self.pat = 1
- self.het = 0
- self.unk = "U"
- self.filler = False
- self.mb_exists = False
-
- #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary
- self.cm_column = 2
- self.mb_column = 3
-
- self.chromosomes = []
-
- self.read_file(filename)
-
- def __iter__(self):
- return iter(self.chromosomes)
-
- def __getitem__(self, index):
- return self.chromosomes[index]
-
- def __len__(self):
- return len(self.chromosomes)
-
- def read_file(self, filename):
-
- with open(filename, 'r') as geno_file:
- lines = geno_file.readlines()
-
- this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers
- chr_ob = None
- for line in lines:
- if line[0] == "#":
- continue
- elif line[0] == "@":
- label = line.split(":")[0][1:]
- if label == "name":
- self.group = line.split(":")[1]
- elif label == "filler":
- if line.split(":")[1] == "yes":
- self.filler = True
- elif label == "type":
- self.type = line.split(":")[1]
- elif label == "mat":
- self.mat = line.split(":")[1]
- elif label == "pat":
- self.pat = line.split(":")[1]
- elif label == "het":
- self.het = line.split(":")[1]
- elif label == "unk":
- self.unk = line.split(":")[1]
- else:
- continue
- elif line[:3] == "Chr":
- header_row = line.split("\t")
- if header_row[2] == "Mb":
- self.mb_exists = True
- self.mb_column = 2
- self.cm_column = 3
- elif header_row[3] == "Mb":
- self.mb_exists = True
- self.mb_column = 3
- elif header_row[2] == "cM":
- self.cm_column = 2
-
- if self.mb_exists:
- self.prgy = header_row[4:]
- else:
- self.prgy = header_row[3:]
- self.nprgy = len(self.prgy)
- else:
- if line.split("\t")[0] != this_chr:
- if this_chr != "":
- self.chromosomes.append(chr_ob)
- this_chr = line.split("\t")[0]
- chr_ob = Chr(line.split("\t")[0], self)
- chr_ob.add_marker(line.split("\t"))
-
-class Chr(object):
- def __init__(self, name, geno_ob):
- self.name = name
- self.loci = []
- self.mb_exists = geno_ob.mb_exists
- self.cm_column = geno_ob.cm_column
- self.mb_column = geno_ob.mb_column
- self.geno_ob = geno_ob
-
- def __iter__(self):
- return iter(self.loci)
-
- def __getitem__(self, index):
- return self.loci[index]
-
- def __len__(self):
- return len(self.loci)
-
- def add_marker(self, marker_row):
- self.loci.append(Locus(marker_row, self.geno_ob))
-
-class Locus(object):
- def __init__(self, marker_row, geno_ob):
- self.chr = marker_row[0]
- self.name = marker_row[1]
- self.cM = float(marker_row[geno_ob.cm_column])
- self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None
-
- geno_table = {
- geno_ob.mat: -1,
- geno_ob.pat: 1,
- geno_ob.het: 0,
- geno_ob.unk: "U"
- }
-
- self.genotype = []
- if geno_ob.mb_exists:
- start_pos = 4
- else:
- start_pos = 3
- for allele in marker_row[start_pos:]:
- if allele in geno_table.keys():
- self.genotype.append(geno_table[allele])
- else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown
+from __future__ import absolute_import, division, print_function + +class genotype(object): + """ + Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure + """ + + def __init__(self, filename): + self.group = None + self.type = "riset" + self.prgy = [] + self.nprgy = 0 + self.mat = -1 + self.pat = 1 + self.het = 0 + self.unk = "U" + self.filler = False + self.mb_exists = False + + #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary + self.cm_column = 2 + self.mb_column = 3 + + self.chromosomes = [] + + self.read_file(filename) + + def __iter__(self): + return iter(self.chromosomes) + + def __getitem__(self, index): + return self.chromosomes[index] + + def __len__(self): + return len(self.chromosomes) + + def read_file(self, filename): + + with open(filename, 'r') as geno_file: + lines = geno_file.readlines() + + this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + chr_ob = None + for line in lines: + if line[0] == "#": + continue + elif line[0] == "@": + label = line.split(":")[0][1:] + if label == "name": + self.group = line.split(":")[1] + elif label == "filler": + if line.split(":")[1] == "yes": + self.filler = True + elif label == "type": + self.type = line.split(":")[1] + elif label == "mat": + self.mat = line.split(":")[1] + elif label == "pat": + self.pat = line.split(":")[1] + elif label == "het": + self.het = line.split(":")[1] + elif label == "unk": + self.unk = line.split(":")[1] + else: + continue + elif line[:3] == "Chr": + header_row = line.split("\t") + if header_row[2] == "Mb": + self.mb_exists = True + self.mb_column = 2 + self.cm_column = 3 + elif header_row[3] == "Mb": + self.mb_exists = True + self.mb_column = 3 + elif header_row[2] == "cM": + self.cm_column = 2 + + if self.mb_exists: + self.prgy = header_row[4:] + else: + self.prgy = header_row[3:] + self.nprgy = len(self.prgy) + else: + if line.split("\t")[0] != this_chr: + if this_chr != "": + self.chromosomes.append(chr_ob) + this_chr = line.split("\t")[0] + chr_ob = Chr(line.split("\t")[0], self) + chr_ob.add_marker(line.split("\t")) + + self.chromosomes.append(chr_ob) + +class Chr(object): + def __init__(self, name, geno_ob): + self.name = name + self.loci = [] + self.mb_exists = geno_ob.mb_exists + self.cm_column = geno_ob.cm_column + self.mb_column = geno_ob.mb_column + self.geno_ob = geno_ob + + def __iter__(self): + return iter(self.loci) + + def __getitem__(self, index): + return self.loci[index] + + def __len__(self): + return len(self.loci) + + def add_marker(self, marker_row): + self.loci.append(Locus(marker_row, self.geno_ob)) + +class Locus(object): + def __init__(self, marker_row, geno_ob): + self.chr = marker_row[0] + self.name = marker_row[1] + self.cM = float(marker_row[geno_ob.cm_column]) + self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + + geno_table = { + geno_ob.mat: -1, + geno_ob.pat: 1, + geno_ob.het: 0, + geno_ob.unk: "U" + } + + self.genotype = [] + if geno_ob.mb_exists: + start_pos = 4 + else: + start_pos = 3 + for allele in marker_row[start_pos:]: + if allele in geno_table.keys(): + self.genotype.append(geno_table[allele]) + else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown self.genotype.append("U")
\ No newline at end of file |