From 66c6bbfcbbd5fb23145ec09956f4809e5f701bec Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 5 Jun 2019 13:01:36 -0500 Subject: Fixed issue that caused interval mapping to not work because the python implementation of the reaper Dataset object doesn't include the addinterval method (so for those situations I still use reaper) Fixed issue where the last chromosome wasn't displayed for mapping results (though still need to fix issue where points are drawn too far to the right when a specific range is viewed) --- wqflask/utility/gen_geno_ob.py | 270 +++++++++++++++++++++-------------------- 1 file changed, 136 insertions(+), 134 deletions(-) (limited to 'wqflask/utility/gen_geno_ob.py') diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 5824b0b3..5172369f 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,135 +1,137 @@ -from __future__ import absolute_import, division, print_function - -class genotype(object): - """ - Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure - """ - - def __init__(self, filename): - self.group = None - self.type = "riset" - self.prgy = [] - self.nprgy = 0 - self.mat = -1 - self.pat = 1 - self.het = 0 - self.unk = "U" - self.filler = False - self.mb_exists = False - - #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary - self.cm_column = 2 - self.mb_column = 3 - - self.chromosomes = [] - - self.read_file(filename) - - def __iter__(self): - return iter(self.chromosomes) - - def __getitem__(self, index): - return self.chromosomes[index] - - def __len__(self): - return len(self.chromosomes) - - def read_file(self, filename): - - with open(filename, 'r') as geno_file: - lines = geno_file.readlines() - - this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers - chr_ob = None - for line in lines: - if line[0] == "#": - continue - elif line[0] == "@": - label = line.split(":")[0][1:] - if label == "name": - self.group = line.split(":")[1] - elif label == "filler": - if line.split(":")[1] == "yes": - self.filler = True - elif label == "type": - self.type = line.split(":")[1] - elif label == "mat": - self.mat = line.split(":")[1] - elif label == "pat": - self.pat = line.split(":")[1] - elif label == "het": - self.het = line.split(":")[1] - elif label == "unk": - self.unk = line.split(":")[1] - else: - continue - elif line[:3] == "Chr": - header_row = line.split("\t") - if header_row[2] == "Mb": - self.mb_exists = True - self.mb_column = 2 - self.cm_column = 3 - elif header_row[3] == "Mb": - self.mb_exists = True - self.mb_column = 3 - elif header_row[2] == "cM": - self.cm_column = 2 - - if self.mb_exists: - self.prgy = header_row[4:] - else: - self.prgy = header_row[3:] - self.nprgy = len(self.prgy) - else: - if line.split("\t")[0] != this_chr: - if this_chr != "": - self.chromosomes.append(chr_ob) - this_chr = line.split("\t")[0] - chr_ob = Chr(line.split("\t")[0], self) - chr_ob.add_marker(line.split("\t")) - -class Chr(object): - def __init__(self, name, geno_ob): - self.name = name - self.loci = [] - self.mb_exists = geno_ob.mb_exists - self.cm_column = geno_ob.cm_column - self.mb_column = geno_ob.mb_column - self.geno_ob = geno_ob - - def __iter__(self): - return iter(self.loci) - - def __getitem__(self, index): - return self.loci[index] - - def __len__(self): - return len(self.loci) - - def add_marker(self, marker_row): - self.loci.append(Locus(marker_row, self.geno_ob)) - -class Locus(object): - def __init__(self, marker_row, geno_ob): - self.chr = marker_row[0] - self.name = marker_row[1] - self.cM = float(marker_row[geno_ob.cm_column]) - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None - - geno_table = { - geno_ob.mat: -1, - geno_ob.pat: 1, - geno_ob.het: 0, - geno_ob.unk: "U" - } - - self.genotype = [] - if geno_ob.mb_exists: - start_pos = 4 - else: - start_pos = 3 - for allele in marker_row[start_pos:]: - if allele in geno_table.keys(): - self.genotype.append(geno_table[allele]) - else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown +from __future__ import absolute_import, division, print_function + +class genotype(object): + """ + Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure + """ + + def __init__(self, filename): + self.group = None + self.type = "riset" + self.prgy = [] + self.nprgy = 0 + self.mat = -1 + self.pat = 1 + self.het = 0 + self.unk = "U" + self.filler = False + self.mb_exists = False + + #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary + self.cm_column = 2 + self.mb_column = 3 + + self.chromosomes = [] + + self.read_file(filename) + + def __iter__(self): + return iter(self.chromosomes) + + def __getitem__(self, index): + return self.chromosomes[index] + + def __len__(self): + return len(self.chromosomes) + + def read_file(self, filename): + + with open(filename, 'r') as geno_file: + lines = geno_file.readlines() + + this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + chr_ob = None + for line in lines: + if line[0] == "#": + continue + elif line[0] == "@": + label = line.split(":")[0][1:] + if label == "name": + self.group = line.split(":")[1] + elif label == "filler": + if line.split(":")[1] == "yes": + self.filler = True + elif label == "type": + self.type = line.split(":")[1] + elif label == "mat": + self.mat = line.split(":")[1] + elif label == "pat": + self.pat = line.split(":")[1] + elif label == "het": + self.het = line.split(":")[1] + elif label == "unk": + self.unk = line.split(":")[1] + else: + continue + elif line[:3] == "Chr": + header_row = line.split("\t") + if header_row[2] == "Mb": + self.mb_exists = True + self.mb_column = 2 + self.cm_column = 3 + elif header_row[3] == "Mb": + self.mb_exists = True + self.mb_column = 3 + elif header_row[2] == "cM": + self.cm_column = 2 + + if self.mb_exists: + self.prgy = header_row[4:] + else: + self.prgy = header_row[3:] + self.nprgy = len(self.prgy) + else: + if line.split("\t")[0] != this_chr: + if this_chr != "": + self.chromosomes.append(chr_ob) + this_chr = line.split("\t")[0] + chr_ob = Chr(line.split("\t")[0], self) + chr_ob.add_marker(line.split("\t")) + + self.chromosomes.append(chr_ob) + +class Chr(object): + def __init__(self, name, geno_ob): + self.name = name + self.loci = [] + self.mb_exists = geno_ob.mb_exists + self.cm_column = geno_ob.cm_column + self.mb_column = geno_ob.mb_column + self.geno_ob = geno_ob + + def __iter__(self): + return iter(self.loci) + + def __getitem__(self, index): + return self.loci[index] + + def __len__(self): + return len(self.loci) + + def add_marker(self, marker_row): + self.loci.append(Locus(marker_row, self.geno_ob)) + +class Locus(object): + def __init__(self, marker_row, geno_ob): + self.chr = marker_row[0] + self.name = marker_row[1] + self.cM = float(marker_row[geno_ob.cm_column]) + self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + + geno_table = { + geno_ob.mat: -1, + geno_ob.pat: 1, + geno_ob.het: 0, + geno_ob.unk: "U" + } + + self.genotype = [] + if geno_ob.mb_exists: + start_pos = 4 + else: + start_pos = 3 + for allele in marker_row[start_pos:]: + if allele in geno_table.keys(): + self.genotype.append(geno_table[allele]) + else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown self.genotype.append("U") \ No newline at end of file -- cgit v1.2.3