Merge /home/gn2/gene

author: Lei Yan 2016-06-16 18:21:01 +0000
committer: Lei Yan 2016-06-16 18:21:01 +0000
commit: 4fec0e6fc0772785a30451d417082bc189f2f6dd (patch)
tree: 6548c2d088d5a80561e23df076456caaeda195c7 /wqflask/utility/genofile_parser.py
parent: e55f38a72d47fbdf5f652a08e8da1db78f1dcdb5 (diff)
parent: d90dc3748557d1d6fbaa59f71fe676b8a7c393ca (diff)
download: genenetwork2-4fec0e6fc0772785a30451d417082bc189f2f6dd.tar.gz
1 files changed, 100 insertions, 0 deletions
diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py
new file mode 100644
index 00000000..67b84dc9
--- /dev/null
+++ b/wqflask/utility/genofile_parser.py
@@ -0,0 +1,100 @@
+# CTL analysis for GN2
+# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+
+from __future__ import print_function, division, absolute_import
+import sys
+import os
+import glob
+import traceback
+import gzip
+
+
+import simplejson as json
+
+from pprint import pformat as pf
+
+class Marker(object):
+  def __init__(self):
+    self.name = None
+    self.chr = None
+    self.cM = None
+    self.Mb = None
+    self.genotypes = []
+
+
+class ConvertGenoFile(object):
+
+  def __init__(self, input_file):
+    self.mb_exists = False
+    self.cm_exists = False
+    self.markers = []
+    
+    self.latest_row_pos = None
+    self.latest_col_pos = None
+    
+    self.latest_row_value = None
+    self.latest_col_value = None
+    self.input_fh = open(input_file)
+    print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!")
+    self.haplotype_notation = {
+      '@mat': "1",
+      '@pat': "2",
+      '@het': "-999",
+      '@unk': "-999"
+    }
+    self.configurations = {}
+
+  def process_rows(self):
+    for self.latest_row_pos, row in enumerate(self.input_fh):
+        self.latest_row_value = row
+        # Take care of headers
+        if not row.strip():
+            continue
+        if row.startswith('#'):
+            continue
+        if row.startswith('Chr'):
+            if 'Mb' in row.split():
+                self.mb_exists = True
+            if 'cM' in row.split():
+                self.cm_exists = True
+            skip = 2 + self.cm_exists + self.mb_exists
+            self.individuals = row.split()[skip:]
+            continue
+        if row.startswith('@'):
+            key, _separater, value = row.partition(':')
+            key = key.strip()
+            value = value.strip()
+            if key in self.haplotype_notation:
+                self.configurations[value] = self.haplotype_notation[key]
+            continue
+        if not len(self.configurations):
+            raise EmptyConfigurations
+        yield row
+
+  def process_csv(self):
+    for row_count, row in enumerate(self.process_rows()):
+      row_items = row.split("\t")
+
+      this_marker = Marker()
+      this_marker.name = row_items[1]
+      this_marker.chr = row_items[0]
+      if self.cm_exists and self.mb_exists:
+        this_marker.cM = row_items[2]
+        this_marker.Mb = row_items[3]
+        genotypes = row_items[4:]
+      elif self.cm_exists:
+          this_marker.cM = row_items[2]
+          genotypes = row_items[3:]
+      elif self.mb_exists:
+          this_marker.Mb = row_items[2]
+          genotypes = row_items[3:]
+      else:
+        genotypes = row_items[2:]
+      for item_count, genotype in enumerate(genotypes):
+        if genotype.upper().strip() in self.configurations:
+          this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+        else:
+          print("WARNING:", genotype.upper())
+          this_marker.genotypes.append("NA")
+      self.markers.append(this_marker.__dict__)
+
author	Lei Yan	2016-06-16 18:21:01 +0000
committer	Lei Yan	2016-06-16 18:21:01 +0000
commit	4fec0e6fc0772785a30451d417082bc189f2f6dd (patch)
tree	6548c2d088d5a80561e23df076456caaeda195c7 /wqflask/utility/genofile_parser.py
parent	e55f38a72d47fbdf5f652a08e8da1db78f1dcdb5 (diff)
parent	d90dc3748557d1d6fbaa59f71fe676b8a7c393ca (diff)
download	genenetwork2-4fec0e6fc0772785a30451d417082bc189f2f6dd.tar.gz