From 66c6bbfcbbd5fb23145ec09956f4809e5f701bec Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 5 Jun 2019 13:01:36 -0500 Subject: Fixed issue that caused interval mapping to not work because the python implementation of the reaper Dataset object doesn't include the addinterval method (so for those situations I still use reaper) Fixed issue where the last chromosome wasn't displayed for mapping results (though still need to fix issue where points are drawn too far to the right when a specific range is viewed) --- wqflask/base/data_set.py | 9 +- wqflask/utility/gen_geno_ob.py | 270 +++++++++++---------- .../marker_regression/display_mapping_results.py | 6 +- .../wqflask/marker_regression/qtlreaper_mapping.py | 2 +- 4 files changed, 147 insertions(+), 140 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index b324ac74..1fd1792e 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -384,7 +384,7 @@ class DatasetGroup(object): [result.extend(l) for l in lists if l] return result - def read_genotype_file(self): + def read_genotype_file(self, use_reaper=False): '''Read genotype from .geno file instead of database''' #genotype_1 is Dataset Object without parents and f1 #genotype_2 is Dataset Object with parents and f1 (not for intercross) @@ -396,9 +396,12 @@ class DatasetGroup(object): full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) - #genotype_1.read(full_filename) - genotype_1 = gen_geno_ob.genotype(full_filename) + if use_reaper: + genotype_1 = reaper.Dataset() + genotype_1.read(full_filename) + else: + genotype_1 = gen_geno_ob.genotype(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 5824b0b3..5172369f 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,135 +1,137 @@ -from __future__ import absolute_import, division, print_function - -class genotype(object): - """ - Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure - """ - - def __init__(self, filename): - self.group = None - self.type = "riset" - self.prgy = [] - self.nprgy = 0 - self.mat = -1 - self.pat = 1 - self.het = 0 - self.unk = "U" - self.filler = False - self.mb_exists = False - - #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary - self.cm_column = 2 - self.mb_column = 3 - - self.chromosomes = [] - - self.read_file(filename) - - def __iter__(self): - return iter(self.chromosomes) - - def __getitem__(self, index): - return self.chromosomes[index] - - def __len__(self): - return len(self.chromosomes) - - def read_file(self, filename): - - with open(filename, 'r') as geno_file: - lines = geno_file.readlines() - - this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers - chr_ob = None - for line in lines: - if line[0] == "#": - continue - elif line[0] == "@": - label = line.split(":")[0][1:] - if label == "name": - self.group = line.split(":")[1] - elif label == "filler": - if line.split(":")[1] == "yes": - self.filler = True - elif label == "type": - self.type = line.split(":")[1] - elif label == "mat": - self.mat = line.split(":")[1] - elif label == "pat": - self.pat = line.split(":")[1] - elif label == "het": - self.het = line.split(":")[1] - elif label == "unk": - self.unk = line.split(":")[1] - else: - continue - elif line[:3] == "Chr": - header_row = line.split("\t") - if header_row[2] == "Mb": - self.mb_exists = True - self.mb_column = 2 - self.cm_column = 3 - elif header_row[3] == "Mb": - self.mb_exists = True - self.mb_column = 3 - elif header_row[2] == "cM": - self.cm_column = 2 - - if self.mb_exists: - self.prgy = header_row[4:] - else: - self.prgy = header_row[3:] - self.nprgy = len(self.prgy) - else: - if line.split("\t")[0] != this_chr: - if this_chr != "": - self.chromosomes.append(chr_ob) - this_chr = line.split("\t")[0] - chr_ob = Chr(line.split("\t")[0], self) - chr_ob.add_marker(line.split("\t")) - -class Chr(object): - def __init__(self, name, geno_ob): - self.name = name - self.loci = [] - self.mb_exists = geno_ob.mb_exists - self.cm_column = geno_ob.cm_column - self.mb_column = geno_ob.mb_column - self.geno_ob = geno_ob - - def __iter__(self): - return iter(self.loci) - - def __getitem__(self, index): - return self.loci[index] - - def __len__(self): - return len(self.loci) - - def add_marker(self, marker_row): - self.loci.append(Locus(marker_row, self.geno_ob)) - -class Locus(object): - def __init__(self, marker_row, geno_ob): - self.chr = marker_row[0] - self.name = marker_row[1] - self.cM = float(marker_row[geno_ob.cm_column]) - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None - - geno_table = { - geno_ob.mat: -1, - geno_ob.pat: 1, - geno_ob.het: 0, - geno_ob.unk: "U" - } - - self.genotype = [] - if geno_ob.mb_exists: - start_pos = 4 - else: - start_pos = 3 - for allele in marker_row[start_pos:]: - if allele in geno_table.keys(): - self.genotype.append(geno_table[allele]) - else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown +from __future__ import absolute_import, division, print_function + +class genotype(object): + """ + Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure + """ + + def __init__(self, filename): + self.group = None + self.type = "riset" + self.prgy = [] + self.nprgy = 0 + self.mat = -1 + self.pat = 1 + self.het = 0 + self.unk = "U" + self.filler = False + self.mb_exists = False + + #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary + self.cm_column = 2 + self.mb_column = 3 + + self.chromosomes = [] + + self.read_file(filename) + + def __iter__(self): + return iter(self.chromosomes) + + def __getitem__(self, index): + return self.chromosomes[index] + + def __len__(self): + return len(self.chromosomes) + + def read_file(self, filename): + + with open(filename, 'r') as geno_file: + lines = geno_file.readlines() + + this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + chr_ob = None + for line in lines: + if line[0] == "#": + continue + elif line[0] == "@": + label = line.split(":")[0][1:] + if label == "name": + self.group = line.split(":")[1] + elif label == "filler": + if line.split(":")[1] == "yes": + self.filler = True + elif label == "type": + self.type = line.split(":")[1] + elif label == "mat": + self.mat = line.split(":")[1] + elif label == "pat": + self.pat = line.split(":")[1] + elif label == "het": + self.het = line.split(":")[1] + elif label == "unk": + self.unk = line.split(":")[1] + else: + continue + elif line[:3] == "Chr": + header_row = line.split("\t") + if header_row[2] == "Mb": + self.mb_exists = True + self.mb_column = 2 + self.cm_column = 3 + elif header_row[3] == "Mb": + self.mb_exists = True + self.mb_column = 3 + elif header_row[2] == "cM": + self.cm_column = 2 + + if self.mb_exists: + self.prgy = header_row[4:] + else: + self.prgy = header_row[3:] + self.nprgy = len(self.prgy) + else: + if line.split("\t")[0] != this_chr: + if this_chr != "": + self.chromosomes.append(chr_ob) + this_chr = line.split("\t")[0] + chr_ob = Chr(line.split("\t")[0], self) + chr_ob.add_marker(line.split("\t")) + + self.chromosomes.append(chr_ob) + +class Chr(object): + def __init__(self, name, geno_ob): + self.name = name + self.loci = [] + self.mb_exists = geno_ob.mb_exists + self.cm_column = geno_ob.cm_column + self.mb_column = geno_ob.mb_column + self.geno_ob = geno_ob + + def __iter__(self): + return iter(self.loci) + + def __getitem__(self, index): + return self.loci[index] + + def __len__(self): + return len(self.loci) + + def add_marker(self, marker_row): + self.loci.append(Locus(marker_row, self.geno_ob)) + +class Locus(object): + def __init__(self, marker_row, geno_ob): + self.chr = marker_row[0] + self.name = marker_row[1] + self.cM = float(marker_row[geno_ob.cm_column]) + self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + + geno_table = { + geno_ob.mat: -1, + geno_ob.pat: 1, + geno_ob.het: 0, + geno_ob.unk: "U" + } + + self.genotype = [] + if geno_ob.mb_exists: + start_pos = 4 + else: + start_pos = 3 + for allele in marker_row[start_pos:]: + if allele in geno_table.keys(): + self.genotype.append(geno_table[allele]) + else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown self.genotype.append("U") \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 993fc2d9..e53e5279 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -236,9 +236,11 @@ class DisplayMappingResults(object): self.selectedChr = int(start_vars['selected_chr']) self.strainlist = start_vars['samples'] - self.genotype = self.dataset.group.read_genotype_file() + if self.mapping_method == "reaper" and self.manhattan_plot != True: - self.genotype = self.genotype.addinterval() + self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) + else: + self.genotype = self.dataset.group.read_genotype_file() #Darwing Options try: diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index 35bed8d8..d58c59c8 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -2,7 +2,7 @@ import utility.logger logger = utility.logger.getLogger(__name__ ) def gen_reaper_results(this_trait, dataset, samples_before, trait_vals, json_data, num_perm, bootCheck, num_bootstrap, do_control, control_marker, manhattan_plot): - genotype = dataset.group.read_genotype_file() + genotype = dataset.group.read_genotype_file(use_reaper=True) if manhattan_plot != True: genotype = genotype.addinterval() -- cgit v1.2.3