about summary refs log tree commit diff
path: root/wqflask/utility
diff options
context:
space:
mode:
authorzsloan2016-05-18 20:06:41 +0000
committerzsloan2016-05-18 20:06:41 +0000
commitf8c89f7c24cbfcffdafd12ef2087f4de598ed4bd (patch)
treeca233c2f08151377de161a616e69031d557ff808 /wqflask/utility
parente36ffce0d972334b07ee91b817fc3f30ed3598c4 (diff)
parenta8945cc625f12e9cf733f469426bf0e4b8e83647 (diff)
downloadgenenetwork2-f8c89f7c24cbfcffdafd12ef2087f4de598ed4bd.tar.gz
Merge branch 'staging' of https://github.com/genenetwork/genenetwork2
Diffstat (limited to 'wqflask/utility')
-rw-r--r--wqflask/utility/external.py9
-rw-r--r--wqflask/utility/genofile_parser.py100
-rw-r--r--wqflask/utility/tools.py189
3 files changed, 230 insertions, 68 deletions
diff --git a/wqflask/utility/external.py b/wqflask/utility/external.py
new file mode 100644
index 00000000..50afea08
--- /dev/null
+++ b/wqflask/utility/external.py
@@ -0,0 +1,9 @@
+# Call external program
+
+import os
+import sys
+import subprocess
+
+def shell(command):
+    if subprocess.call(command, shell=True) != 0:
+        raise Exception("ERROR: failed on "+command)
diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py
new file mode 100644
index 00000000..67b84dc9
--- /dev/null
+++ b/wqflask/utility/genofile_parser.py
@@ -0,0 +1,100 @@
+# CTL analysis for GN2
+# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+
+from __future__ import print_function, division, absolute_import
+import sys
+import os
+import glob
+import traceback
+import gzip
+
+
+import simplejson as json
+
+from pprint import pformat as pf
+
+class Marker(object):
+  def __init__(self):
+    self.name = None
+    self.chr = None
+    self.cM = None
+    self.Mb = None
+    self.genotypes = []
+
+
+class ConvertGenoFile(object):
+
+  def __init__(self, input_file):
+    self.mb_exists = False
+    self.cm_exists = False
+    self.markers = []
+    
+    self.latest_row_pos = None
+    self.latest_col_pos = None
+    
+    self.latest_row_value = None
+    self.latest_col_value = None
+    self.input_fh = open(input_file)
+    print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!")
+    self.haplotype_notation = {
+      '@mat': "1",
+      '@pat': "2",
+      '@het': "-999",
+      '@unk': "-999"
+    }
+    self.configurations = {}
+
+  def process_rows(self):
+    for self.latest_row_pos, row in enumerate(self.input_fh):
+        self.latest_row_value = row
+        # Take care of headers
+        if not row.strip():
+            continue
+        if row.startswith('#'):
+            continue
+        if row.startswith('Chr'):
+            if 'Mb' in row.split():
+                self.mb_exists = True
+            if 'cM' in row.split():
+                self.cm_exists = True
+            skip = 2 + self.cm_exists + self.mb_exists
+            self.individuals = row.split()[skip:]
+            continue
+        if row.startswith('@'):
+            key, _separater, value = row.partition(':')
+            key = key.strip()
+            value = value.strip()
+            if key in self.haplotype_notation:
+                self.configurations[value] = self.haplotype_notation[key]
+            continue
+        if not len(self.configurations):
+            raise EmptyConfigurations
+        yield row
+
+  def process_csv(self):
+    for row_count, row in enumerate(self.process_rows()):
+      row_items = row.split("\t")
+
+      this_marker = Marker()
+      this_marker.name = row_items[1]
+      this_marker.chr = row_items[0]
+      if self.cm_exists and self.mb_exists:
+        this_marker.cM = row_items[2]
+        this_marker.Mb = row_items[3]
+        genotypes = row_items[4:]
+      elif self.cm_exists:
+          this_marker.cM = row_items[2]
+          genotypes = row_items[3:]
+      elif self.mb_exists:
+          this_marker.Mb = row_items[2]
+          genotypes = row_items[3:]
+      else:
+        genotypes = row_items[2:]
+      for item_count, genotype in enumerate(genotypes):
+        if genotype.upper().strip() in self.configurations:
+          this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+        else:
+          print("WARNING:", genotype.upper())
+          this_marker.genotypes.append("NA")
+      self.markers.append(this_marker.__dict__)
+
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index b8a41f60..dd8c4a1e 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -1,84 +1,137 @@
 # Tools/paths finder resolves external paths from settings and/or environment
 # variables
-#
-# Currently supported:
-#
-#   PYLMM_PATH finds the root of the git repository of the pylmm_gn2 tool 
 
 import os
 import sys
 from wqflask import app
 
-def get_setting(id,default,guess,get_valid_path):
-    """
-    Resolve a setting from the environment or the global settings in app.config
+def get_setting(command_id,guess=None):
+    """Resolve a setting from the environment or the global settings in
+    app.config, with get_valid_path is a function checking whether the
+    path points to an expected directory and returns the full path to
+    the binary command
+
+      guess = os.environ.get('HOME')+'/pylmm'
+      get_setting('PYLMM_PATH',guess)
+
+    first tries the environment variable in +id+, next gets the Flask
+    app setting for the same +id+ and finally does an educated
+    +guess+.
+
+    In all, the environment overrides the others, next is the flask
+    setting, then the guess. A valid path to the binary command is
+    returned. If none is resolved an exception is thrown.
+
+    Note that we do not use the system path. This is on purpose
+    because it will mess up controlled (reproducible) deployment. The
+    proper way is to either use the GNU Guix defaults as listed in
+    etc/default_settings.py or override them yourself by creating a
+    different settings.py file (or setting the environment).
+
     """
+    def value(command):
+        if command:
+            sys.stderr.write("Found path "+command+"\n")
+            return command
+        else:
+            return None
+    
     # ---- Check whether environment exists
-    path = get_valid_path(os.environ.get(id))
-    # ---- Check whether setting exists
-    setting = app.config.get(id)
-    if not path:
-        path = get_valid_path(setting)
-    # ---- Check whether default exists
-    if not path:
-        path = get_valid_path(default)
-    # ---- Guess directory
-    if not path:
-        if not setting:
-            setting = guess
-        path = get_valid_path(guess)
-    if not path:
-        raise Exception(id+' '+setting+' path unknown or faulty (update settings.py?). '+id+' should point to the root of the git repository')
-
-    return path
-
-def pylmm_command(default=None):
+    sys.stderr.write("Looking for "+command_id+"\n")
+    command = value(os.environ.get(command_id))
+    if not command:
+        # ---- Check whether setting exists in app
+        command = value(app.config.get(command_id))
+        if not command:
+            command = value(guess)
+            if not command:
+                raise Exception(command_id+' path unknown or faulty (update settings.py?). '+command_id+' should point to the path')
+    return command
+
+def valid_bin(bin):
+    if os.path.islink(bin) or valid_file(bin):
+        return bin
+    return None
+
+def valid_file(fn):
+    if os.path.isfile(fn):
+        return fn
+    return None
+
+def valid_path(dir):
+    if os.path.isdir(dir):
+        return dir
+    return None
+
+def pylmm_command(guess=None):
+    return valid_bin(get_setting("PYLMM_COMMAND",guess))
+
+def gemma_command(guess=None):
+    return valid_bin(get_setting("GEMMA_COMMAND",guess))
+
+def plink_command(guess=None):
+    return valid_bin(get_setting("PLINK_COMMAND",guess))
+
+def flat_files(subdir=None):
+    base = get_setting("GENENETWORK_FILES")
+    if subdir:
+        return assert_dir(base+"/"+subdir)
+    return assert_dir(base)
+
+def assert_dir(dir):
+    if not valid_path(dir):
+        raise Exception("ERROR: can not find directory "+dir)
+    return dir
+
+def mk_dir(dir):
+    if not valid_path(dir):
+        os.makedirs(dir)
+    return assert_dir(dir)
+
+def locate(name, subdir=None):
     """
-    Return the path to the repository and the python command to call
+    Locate a static flat file in the GENENETWORK_FILES environment.
+
+    This function throws an error when the file is not found.
     """
-    def get_valid_path(path):
-        """Test for a valid repository"""
-        if path:
-            sys.stderr.write("Trying PYLMM_PATH in "+path+"\n")
-        if path and os.path.isfile(path+'/pylmm_gn2/lmm.py'):
-            return path
+    base = get_setting("GENENETWORK_FILES")
+    if subdir:
+        base = base+"/"+subdir
+    if valid_path(base):
+        lookfor = base + "/" + name
+        if valid_file(lookfor):
+            print("Found: file "+lookfor+"\n")
+            return lookfor
         else:
-            None
+            raise Exception("Can not locate "+lookfor)
+    if subdir: sys.stderr.write(subdir)
+    raise Exception("Can not locate "+name+" in "+base)
 
-    guess = os.environ.get('HOME')+'/pylmm_gn2'
-    path = get_setting('PYLMM_PATH',default,guess,get_valid_path)
-    pylmm_command = 'python '+path+'/pylmm_gn2/lmm.py'
-    return path,pylmm_command
-
-def plink_command(default=None):
+def locate_ignore_error(name, subdir=None):
     """
-    Return the path to the repository and the python command to call
+    Locate a static flat file in the GENENETWORK_FILES environment.
+
+    This function does not throw an error when the file is not found
+    but returns None.
     """
-    def get_valid_path(path):
-        """Test for a valid repository"""
-        if path:
-            sys.stderr.write("Trying PLINK_PATH in "+path+"\n")
-        if path and os.path.isfile(path+'/plink'):
-            return path
-        else:
-            None
-
-    guess = os.environ.get('HOME')+'/plink_gemma'
-    path = get_setting('PLINK_PATH',default,guess,get_valid_path)
-    plink_command = path+'/plink'
-    return path,plink_command
-
-def gemma_command(default=None):
-    def get_valid_path(path):
-        """Test for a valid repository"""
-        if path:
-            sys.stderr.write("Trying PLINK_PATH in "+path+"\n")
-        if path and os.path.isfile(path+'/plink'):
-            return path
-        else:
-            None
+    base = get_setting("GENENETWORK_FILES")
+    if subdir:
+        base = base+"/"+subdir
+    if valid_path(base):
+        lookfor = base + "/" + name
+        if valid_file(lookfor):
+            print("Found: file "+name+"\n")
+            return lookfor
+    sys.stderr.write("WARNING: file "+name+" not found\n")
+    return None
+
+def tempdir():
+    return valid_path(get_setting("TEMPDIR","/tmp"))
 
-    guess = os.environ.get('HOME')+'/plink'
-    path = get_setting('PLINK_PATH',default,guess,get_valid_path)
-    gemma_command = path+'/gemma'
-    return path, gemma_command
\ No newline at end of file
+    
+# Cached values
+PYLMM_COMMAND = pylmm_command()
+GEMMA_COMMAND = gemma_command()
+PLINK_COMMAND = plink_command()
+FLAT_FILES    = flat_files()
+TEMPDIR       = tempdir()