about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--wqflask/base/data_set.py7
-rw-r--r--wqflask/utility/gen_geno_ob.py135
2 files changed, 140 insertions, 2 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index beb2a8a2..b324ac74 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -43,6 +43,7 @@ from db import webqtlDatabaseFunction
 from utility import webqtlUtil
 from utility.benchmark import Bench
 from utility import chunks
+from utility import gen_geno_ob
 from utility.tools import locate, locate_ignore_error, flat_files
 
 from maintenance import get_group_samplelists
@@ -388,14 +389,16 @@ class DatasetGroup(object):
         #genotype_1 is Dataset Object without parents and f1
         #genotype_2 is Dataset Object with parents and f1 (not for intercross)
 
-        genotype_1 = reaper.Dataset()
+        #genotype_1 = reaper.Dataset()
 
         # reaper barfs on unicode filenames, so here we ensure it's a string
         if self.genofile:
             full_filename = str(locate(self.genofile, 'genotype'))
         else:
             full_filename = str(locate(self.name + '.geno', 'genotype'))
-        genotype_1.read(full_filename)
+        #genotype_1.read(full_filename)
+
+        genotype_1 = gen_geno_ob.genotype(full_filename)
 
         if genotype_1.type == "group" and self.parlist:
             genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1])       #, F1=_f1)
diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py
new file mode 100644
index 00000000..5824b0b3
--- /dev/null
+++ b/wqflask/utility/gen_geno_ob.py
@@ -0,0 +1,135 @@
+from __future__ import absolute_import, division, print_function

+

+class genotype(object):

+    """

+    Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure

+    """

+

+    def __init__(self, filename):

+        self.group = None

+        self.type = "riset"

+        self.prgy = []

+        self.nprgy = 0

+        self.mat = -1

+        self.pat = 1

+        self.het = 0

+        self.unk = "U"

+        self.filler = False

+        self.mb_exists = False

+

+        #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary

+        self.cm_column = 2

+        self.mb_column = 3

+

+        self.chromosomes = []

+

+        self.read_file(filename)

+

+    def __iter__(self):

+        return iter(self.chromosomes)

+

+    def __getitem__(self, index):

+        return self.chromosomes[index]

+

+    def __len__(self):

+        return len(self.chromosomes)

+

+    def read_file(self, filename):

+

+        with open(filename, 'r') as geno_file:

+            lines = geno_file.readlines()

+

+            this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers

+            chr_ob = None

+            for line in lines:

+                if line[0] == "#":

+                    continue

+                elif line[0] == "@":

+                    label = line.split(":")[0][1:]

+                    if label == "name":

+                        self.group = line.split(":")[1]

+                    elif label == "filler":

+                        if line.split(":")[1] == "yes":

+                            self.filler = True

+                    elif label == "type":

+                        self.type = line.split(":")[1]

+                    elif label == "mat":

+                        self.mat = line.split(":")[1]

+                    elif label == "pat":

+                        self.pat = line.split(":")[1]

+                    elif label == "het":

+                        self.het = line.split(":")[1]

+                    elif label == "unk":

+                        self.unk = line.split(":")[1]

+                    else:

+                        continue

+                elif line[:3] == "Chr":

+                    header_row = line.split("\t")

+                    if header_row[2] == "Mb":

+                        self.mb_exists = True

+                        self.mb_column = 2

+                        self.cm_column = 3

+                    elif header_row[3] == "Mb":

+                        self.mb_exists = True

+                        self.mb_column = 3

+                    elif header_row[2] == "cM":

+                        self.cm_column = 2

+

+                    if self.mb_exists:

+                        self.prgy = header_row[4:]

+                    else:

+                        self.prgy = header_row[3:]

+                    self.nprgy = len(self.prgy)

+                else:

+                    if line.split("\t")[0] != this_chr:

+                        if this_chr != "":

+                            self.chromosomes.append(chr_ob)

+                        this_chr = line.split("\t")[0]

+                        chr_ob = Chr(line.split("\t")[0], self)

+                    chr_ob.add_marker(line.split("\t"))

+                    

+class Chr(object):

+    def __init__(self, name, geno_ob):

+        self.name = name

+        self.loci = []

+        self.mb_exists = geno_ob.mb_exists

+        self.cm_column = geno_ob.cm_column

+        self.mb_column = geno_ob.mb_column

+        self.geno_ob = geno_ob

+

+    def __iter__(self):

+        return iter(self.loci)

+

+    def __getitem__(self, index):

+        return self.loci[index]

+

+    def __len__(self):

+        return len(self.loci)

+    

+    def add_marker(self, marker_row):

+        self.loci.append(Locus(marker_row, self.geno_ob))

+

+class Locus(object):

+    def __init__(self, marker_row, geno_ob):

+        self.chr = marker_row[0]

+        self.name = marker_row[1]

+        self.cM = float(marker_row[geno_ob.cm_column])

+        self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None

+

+        geno_table = {

+            geno_ob.mat: -1,

+            geno_ob.pat: 1,

+            geno_ob.het: 0,

+            geno_ob.unk: "U"

+        }

+

+        self.genotype = []

+        if geno_ob.mb_exists:

+            start_pos = 4

+        else:

+            start_pos = 3

+        for allele in marker_row[start_pos:]:

+            if allele in geno_table.keys():

+                self.genotype.append(geno_table[allele])

+            else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown

+                self.genotype.append("U")
\ No newline at end of file