From e304c2cfdf1b38da483efa607a2acd72b66ad0de Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 24 May 2019 11:35:12 -0500 Subject: Replaced qtlreaper Dataset object with python object --- wqflask/utility/gen_geno_ob.py | 135 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 wqflask/utility/gen_geno_ob.py (limited to 'wqflask/utility') diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py new file mode 100644 index 00000000..5824b0b3 --- /dev/null +++ b/wqflask/utility/gen_geno_ob.py @@ -0,0 +1,135 @@ +from __future__ import absolute_import, division, print_function + +class genotype(object): + """ + Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure + """ + + def __init__(self, filename): + self.group = None + self.type = "riset" + self.prgy = [] + self.nprgy = 0 + self.mat = -1 + self.pat = 1 + self.het = 0 + self.unk = "U" + self.filler = False + self.mb_exists = False + + #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary + self.cm_column = 2 + self.mb_column = 3 + + self.chromosomes = [] + + self.read_file(filename) + + def __iter__(self): + return iter(self.chromosomes) + + def __getitem__(self, index): + return self.chromosomes[index] + + def __len__(self): + return len(self.chromosomes) + + def read_file(self, filename): + + with open(filename, 'r') as geno_file: + lines = geno_file.readlines() + + this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + chr_ob = None + for line in lines: + if line[0] == "#": + continue + elif line[0] == "@": + label = line.split(":")[0][1:] + if label == "name": + self.group = line.split(":")[1] + elif label == "filler": + if line.split(":")[1] == "yes": + self.filler = True + elif label == "type": + self.type = line.split(":")[1] + elif label == "mat": + self.mat = line.split(":")[1] + elif label == "pat": + self.pat = line.split(":")[1] + elif label == "het": + self.het = line.split(":")[1] + elif label == "unk": + self.unk = line.split(":")[1] + else: + continue + elif line[:3] == "Chr": + header_row = line.split("\t") + if header_row[2] == "Mb": + self.mb_exists = True + self.mb_column = 2 + self.cm_column = 3 + elif header_row[3] == "Mb": + self.mb_exists = True + self.mb_column = 3 + elif header_row[2] == "cM": + self.cm_column = 2 + + if self.mb_exists: + self.prgy = header_row[4:] + else: + self.prgy = header_row[3:] + self.nprgy = len(self.prgy) + else: + if line.split("\t")[0] != this_chr: + if this_chr != "": + self.chromosomes.append(chr_ob) + this_chr = line.split("\t")[0] + chr_ob = Chr(line.split("\t")[0], self) + chr_ob.add_marker(line.split("\t")) + +class Chr(object): + def __init__(self, name, geno_ob): + self.name = name + self.loci = [] + self.mb_exists = geno_ob.mb_exists + self.cm_column = geno_ob.cm_column + self.mb_column = geno_ob.mb_column + self.geno_ob = geno_ob + + def __iter__(self): + return iter(self.loci) + + def __getitem__(self, index): + return self.loci[index] + + def __len__(self): + return len(self.loci) + + def add_marker(self, marker_row): + self.loci.append(Locus(marker_row, self.geno_ob)) + +class Locus(object): + def __init__(self, marker_row, geno_ob): + self.chr = marker_row[0] + self.name = marker_row[1] + self.cM = float(marker_row[geno_ob.cm_column]) + self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + + geno_table = { + geno_ob.mat: -1, + geno_ob.pat: 1, + geno_ob.het: 0, + geno_ob.unk: "U" + } + + self.genotype = [] + if geno_ob.mb_exists: + start_pos = 4 + else: + start_pos = 3 + for allele in marker_row[start_pos:]: + if allele in geno_table.keys(): + self.genotype.append(geno_table[allele]) + else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown + self.genotype.append("U") \ No newline at end of file -- cgit v1.2.3