aboutsummaryrefslogtreecommitdiff
from __future__ import absolute_import, division, print_function

import utility.logger
logger = utility.logger.getLogger(__name__ )

class genotype(object):
    """
    Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure
    """

    def __init__(self, filename):
        self.group = None
        self.type = "riset"
        self.prgy = []
        self.nprgy = 0
        self.mat = -1
        self.pat = 1
        self.het = 0
        self.unk = "U"
        self.filler = False
        self.mb_exists = False

        #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary
        self.cm_column = 2
        self.mb_column = 3

        self.chromosomes = []

        self.read_file(filename)

    def __iter__(self):
        return iter(self.chromosomes)

    def __getitem__(self, index):
        return self.chromosomes[index]

    def __len__(self):
        return len(self.chromosomes)

    def read_rdata_output(self, qtl_results):
        #ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results
        self.chromosomes = [] #ZS: Overwriting since the .geno file's contents are just placeholders

        this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers
        chr_ob = None
        for marker in qtl_results:
            locus = Locus(self)
            if (str(marker['chr']) != this_chr) and this_chr != "X": #ZS: This is really awkward but works as a temporary fix
                if this_chr != "":
                    self.chromosomes.append(chr_ob)
                this_chr = str(marker['chr'])
                if this_chr == "20":
                    this_chr = "X"
                chr_ob = Chr(this_chr, self)
            if 'chr' in marker:
                locus.chr = str(marker['chr'])
            if 'name' in marker:
                locus.name = marker['name']
            if 'Mb' in marker:
                locus.Mb = marker['Mb']
            if 'cM' in marker:
                locus.cM = marker['cM']
            chr_ob.loci.append(locus)

        self.chromosomes.append(chr_ob)

        return self

    def read_file(self, filename):
        with open(filename, 'r') as geno_file:
            lines = geno_file.readlines()

            this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers
            chr_ob = None
            for line in lines:
                if line[0] == "#":
                    continue
                elif line[0] == "@":
                    label = line.split(":")[0][1:]
                    if label == "name":
                        self.group = line.split(":")[1].strip()
                    elif label == "filler":
                        if line.split(":")[1].strip() == "yes":
                            self.filler = True
                    elif label == "type":
                        self.type = line.split(":")[1].strip()
                    elif label == "mat":
                        self.mat = line.split(":")[1].strip()
                    elif label == "pat":
                        self.pat = line.split(":")[1].strip()
                    elif label == "het":
                        self.het = line.split(":")[1].strip()
                    elif label == "unk":
                        self.unk = line.split(":")[1].strip()
                    else:
                        continue
                elif line[:3] == "Chr":
                    header_row = line.split("\t")
                    if header_row[2] == "Mb":
                        self.mb_exists = True
                        self.mb_column = 2
                        self.cm_column = 3
                    elif header_row[3] == "Mb":
                        self.mb_exists = True
                        self.mb_column = 3
                    elif header_row[2] == "cM":
                        self.cm_column = 2

                    if self.mb_exists:
                        self.prgy = header_row[4:]
                    else:
                        self.prgy = header_row[3:]
                    self.nprgy = len(self.prgy)
                else:
                    if line.split("\t")[0] != this_chr:
                        if this_chr != "":
                            self.chromosomes.append(chr_ob)
                        this_chr = line.split("\t")[0]
                        chr_ob = Chr(line.split("\t")[0], self)
                    chr_ob.add_marker(line.split("\t"))

            self.chromosomes.append(chr_ob)

class Chr(object):
    def __init__(self, name, geno_ob):
        self.name = name
        self.loci = []
        self.mb_exists = geno_ob.mb_exists
        self.cm_column = geno_ob.cm_column
        self.mb_column = geno_ob.mb_column
        self.geno_ob = geno_ob

    def __iter__(self):
        return iter(self.loci)

    def __getitem__(self, index):
        return self.loci[index]

    def __len__(self):
        return len(self.loci)

    def add_marker(self, marker_row):
        self.loci.append(Locus(self.geno_ob, marker_row))

class Locus(object):
    def __init__(self, geno_ob, marker_row = None):
        self.chr = None
        self.name = None
        self.cM = None
        self.Mb = None
        self.genotype = []
        if marker_row:
            self.chr = marker_row[0]
            self.name = marker_row[1]
            try:
                self.cM = float(marker_row[geno_ob.cm_column])
            except:
                self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0
            try:
                self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None
            except:
                self.Mb = self.cM

            geno_table = {
                geno_ob.mat: -1,
                geno_ob.pat: 1,
                geno_ob.het: 0,
                geno_ob.unk: "U"
            }

            self.genotype = []
            if geno_ob.mb_exists:
                start_pos = 4
            else:
                start_pos = 3

            for allele in marker_row[start_pos:]:
                if allele in geno_table.keys():
                    self.genotype.append(geno_table[allele])
                else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown
                    self.genotype.append("U")